Message ID | patch-18150-tamar@arm.com |
---|---|
State | New |
Headers | show |
Series | middle-end: make memory analysis for early break more deterministic [PR113135] | expand |
On Thu, 11 Jan 2024, Tamar Christina wrote: > Hi All, > > Instead of searching for where to move stores to, they should always be in > exit belonging to the latch. We can only ever delay stores and even if we > pick a different exit than the latch one as the main one, effects still > happen in program order when vectorized. If we don't move the stores to the > latch exit but instead to whever we pick as the "main" exit then we can > perform incorrect memory accesses (luckily these are trapped by verify_ssa). > > We used to iterate over the conds and check the loads and stores inside them. > However this relies on the conds being ordered in program order. Additionally > if there is a basic block between two conds we would not have analyzed it. > > Instead this now walks from the preds of the destination basic block up to the > loop header and analyzes every block along the way. As a later optimization we > could stop as soon as we've seen all the BBs we have conds for. For now the > header will always contain the first cond, but this can change when we support > arbitrary control flow. > > Bootstrapped Regtested on aarch64-none-linux-gnu, x86_64-pc-linux-gnu > and no issues normally and with --enable-checking=release --enable-lto > --with-build-config=bootstrap-O3 --enable-checking=yes,rtl,extra. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR tree-optimization/113135 > * tree-vect-data-refs.cc (vect_analyze_early_break_dependences): Rework > dependency analysis. > > gcc/testsuite/ChangeLog: > > PR tree-optimization/113135 > * gcc.dg/vect/vect-early-break_103-pr113135.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c > new file mode 100644 > index 0000000000000000000000000000000000000000..bbad7ee2cb18086e470f4a2a2dc0a2b345bbdd71 > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-add-options vect_early_break } */ > +/* { dg-require-effective-target vect_early_break } */ > +/* { dg-require-effective-target vect_int } */ > +/* { dg-additional-options "-w" } */ > + > +char UnpackReadTables_BitLength[20]; > +int UnpackReadTables_ZeroCount; > +void UnpackReadTables() { > + for (unsigned I = 0; I < 20;) > + while (UnpackReadTables_ZeroCount-- && > + I < sizeof(UnpackReadTables_BitLength)) > + UnpackReadTables_BitLength[I++] = 0; > +} > diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc > index 3d9673fb0b580ff21ff151dc5c199840df41a1cd..6b76eee72cb7d09de5f443589b4fc3a0e8c2584f 100644 > --- a/gcc/tree-vect-data-refs.cc > +++ b/gcc/tree-vect-data-refs.cc > @@ -671,13 +671,18 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) > "loop contains multiple exits, analyzing" > " statement dependencies.\n"); > > - for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) > - { > - stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c); > - if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type) > - continue; > + /* Since we don't support general control flow, the location we'll move the > + side-effects to is always the latch connected exit. When we support > + general control flow we can do better but for now this is fine. */ > + dest_bb = single_pred (loop->latch); > + auto_vec <edge> workset; > + for (auto e: dest_bb->preds) > + workset.safe_push (e); If you handle an arbitrary number of preds here ... > - gimple_stmt_iterator gsi = gsi_for_stmt (c); > + while (!workset.is_empty ()) > + { > + basic_block bb = workset.pop ()->src; > + gimple_stmt_iterator gsi = gsi_last_bb (bb); > > /* Now analyze all the remaining statements and try to determine which > instructions are allowed/needed to be moved. */ > @@ -705,10 +710,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > "early breaks only supported on statically" > " allocated objects.\n"); > - return opt_result::failure_at (c, > + return opt_result::failure_at (stmt, > "can't safely apply code motion to " > "dependencies of %G to vectorize " > - "the early exit.\n", c); > + "the early exit.\n", stmt); > } > > tree refop = TREE_OPERAND (obj, 0); > @@ -720,10 +725,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) > dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, > "early breaks only supported on" > " statically allocated objects.\n"); > - return opt_result::failure_at (c, > + return opt_result::failure_at (stmt, > "can't safely apply code motion to " > "dependencies of %G to vectorize " > - "the early exit.\n", c); > + "the early exit.\n", stmt); > } > > /* Check if vector accesses to the object will be within bounds. > @@ -736,10 +741,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) > "early breaks not supported: vectorization " > "would %s beyond size of obj.", > DR_IS_READ (dr_ref) ? "read" : "write"); > - return opt_result::failure_at (c, > + return opt_result::failure_at (stmt, > "can't safely apply code motion to " > "dependencies of %G to vectorize " > - "the early exit.\n", c); > + "the early exit.\n", stmt); > } > > if (DR_IS_READ (dr_ref)) > @@ -802,26 +807,15 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) > } > } > > - /* Save destination as we go, BB are visited in order and the last one > - is where statements should be moved to. */ > - if (!dest_bb) > - dest_bb = gimple_bb (c); > - else > - { > - basic_block curr_bb = gimple_bb (c); > - if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) > - dest_bb = curr_bb; > - } > + if (bb != loop->header) > + for (auto e: bb->preds) > + workset.safe_push (e); ... or here that wouldn't be correct if we actually had such a case (we don't), since that no longer guarantees we visit stores before loads. Please get rid of the workset and instead do bb = single_pred (bb); to iterate to the next block, stopping at the loop header. OK with that change. Thanks, Richard.
--- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-early-break_103-pr113135.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-add-options vect_early_break } */ +/* { dg-require-effective-target vect_early_break } */ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-w" } */ + +char UnpackReadTables_BitLength[20]; +int UnpackReadTables_ZeroCount; +void UnpackReadTables() { + for (unsigned I = 0; I < 20;) + while (UnpackReadTables_ZeroCount-- && + I < sizeof(UnpackReadTables_BitLength)) + UnpackReadTables_BitLength[I++] = 0; +} diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc index 3d9673fb0b580ff21ff151dc5c199840df41a1cd..6b76eee72cb7d09de5f443589b4fc3a0e8c2584f 100644 --- a/gcc/tree-vect-data-refs.cc +++ b/gcc/tree-vect-data-refs.cc @@ -671,13 +671,18 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) "loop contains multiple exits, analyzing" " statement dependencies.\n"); - for (gimple *c : LOOP_VINFO_LOOP_CONDS (loop_vinfo)) - { - stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (c); - if (STMT_VINFO_TYPE (loop_cond_info) != loop_exit_ctrl_vec_info_type) - continue; + /* Since we don't support general control flow, the location we'll move the + side-effects to is always the latch connected exit. When we support + general control flow we can do better but for now this is fine. */ + dest_bb = single_pred (loop->latch); + auto_vec <edge> workset; + for (auto e: dest_bb->preds) + workset.safe_push (e); - gimple_stmt_iterator gsi = gsi_for_stmt (c); + while (!workset.is_empty ()) + { + basic_block bb = workset.pop ()->src; + gimple_stmt_iterator gsi = gsi_last_bb (bb); /* Now analyze all the remaining statements and try to determine which instructions are allowed/needed to be moved. */ @@ -705,10 +710,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "early breaks only supported on statically" " allocated objects.\n"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } tree refop = TREE_OPERAND (obj, 0); @@ -720,10 +725,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, "early breaks only supported on" " statically allocated objects.\n"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } /* Check if vector accesses to the object will be within bounds. @@ -736,10 +741,10 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) "early breaks not supported: vectorization " "would %s beyond size of obj.", DR_IS_READ (dr_ref) ? "read" : "write"); - return opt_result::failure_at (c, + return opt_result::failure_at (stmt, "can't safely apply code motion to " "dependencies of %G to vectorize " - "the early exit.\n", c); + "the early exit.\n", stmt); } if (DR_IS_READ (dr_ref)) @@ -802,26 +807,15 @@ vect_analyze_early_break_dependences (loop_vec_info loop_vinfo) } } - /* Save destination as we go, BB are visited in order and the last one - is where statements should be moved to. */ - if (!dest_bb) - dest_bb = gimple_bb (c); - else - { - basic_block curr_bb = gimple_bb (c); - if (dominated_by_p (CDI_DOMINATORS, curr_bb, dest_bb)) - dest_bb = curr_bb; - } + if (bb != loop->header) + for (auto e: bb->preds) + workset.safe_push (e); } - basic_block dest_bb0 = EDGE_SUCC (dest_bb, 0)->dest; - basic_block dest_bb1 = EDGE_SUCC (dest_bb, 1)->dest; - dest_bb = flow_bb_inside_loop_p (loop, dest_bb0) ? dest_bb0 : dest_bb1; /* We don't allow outer -> inner loop transitions which should have been trapped already during loop form analysis. */ gcc_assert (dest_bb->loop_father == loop); - gcc_assert (dest_bb); LOOP_VINFO_EARLY_BRK_DEST_BB (loop_vinfo) = dest_bb; if (!LOOP_VINFO_EARLY_BRK_VUSES (loop_vinfo).is_empty ())