diff mbox series

[3/5] Add LOOP_VINFO_MAIN_LOOP_INFO

Message ID 20241106143118.D2B3C3858C39@sourceware.org
State New
Headers show
Series [1/5] Check LOOP_VINFO_PEELING_FOR_GAPS on epilog is supported | expand

Commit Message

Richard Biener Nov. 6, 2024, 2:29 p.m. UTC
The following introduces LOOP_VINFO_MAIN_LOOP_INFO alongside
LOOP_VINFO_ORIG_LOOP_INFO so one can have both access to the main
vectorized loop info and the preceeding vectorized epilogue.
This is critical for correctness as we need to disallow never
executed epilogues by costing in vect_analyze_loop_costing as
we assume those do not exist when deciding to add a skip-vector
edge during peeling.  The patch also changes how multiple vector
epilogues are handled - instead of the epilogue_vinfos array in
the main loop info we now record the single epilogue_vinfo there
and further epilogues in the epilogue_vinfo member of the
epilogue info.  This simplifies code.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

	* tree-vectorizer.h (_loop_vec_info::main_loop_info): New.
	(LOOP_VINFO_MAIN_LOOP_INFO): Likewise.
	(_loop_vec_info::epilogue_vinfo): Change from epilogue_vinfos
	from array to single element.
	* tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Initialize
	main_loop_info and epilogue_vinfo.  Remove epilogue_vinfos
	allocation.
	(_loop_vec_info::~_loop_vec_info): Do not release epilogue_vinfos.
	(vect_create_loop_vinfo): Rename parameter, set
	LOOP_VINFO_MAIN_LOOP_INFO.
	(vect_analyze_loop_1): Rename parameter.
	(vect_analyze_loop_costing): Properly distinguish between
	the main vector loop and the preceeding epilogue.
	(vect_analyze_loop): Change for epilogue_vinfos no longer
	being a vector.
	* tree-vect-loop-manip.cc (vect_do_peeling): Simplify and
	thereby handle a vector epilogue of a vector epilogue.
---
 gcc/tree-vect-loop-manip.cc | 22 +++++-------
 gcc/tree-vect-loop.cc       | 67 ++++++++++++++++++++-----------------
 gcc/tree-vectorizer.h       | 12 +++++--
 3 files changed, 53 insertions(+), 48 deletions(-)
diff mbox series

Patch

diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc
index 5bbeeddd854..c8dc7153298 100644
--- a/gcc/tree-vect-loop-manip.cc
+++ b/gcc/tree-vect-loop-manip.cc
@@ -3100,12 +3100,12 @@  vect_get_main_loop_result (loop_vec_info loop_vinfo, tree main_loop_value,
    The analysis resulting in this epilogue loop's loop_vec_info was performed
    in the same vect_analyze_loop call as the main loop's.  At that time
    vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
-   vectorization factors than the main loop.  This list is stored in the main
-   loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
-   vectorize the epilogue loop for a lower vectorization factor,  the
-   loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
-   updated and linked to the epilogue loop.  This is later used to vectorize
-   the epilogue.  The reason the loop_vec_info needs updating is that it was
+   vectorization factors than the main loop.  This list is chained in the
+   loop's loop_vec_info in the 'epilogue_vinfo' member.  When we decide to
+   vectorize the epilogue loop for a lower vectorization factor, the
+   loop_vec_info in epilogue_vinfo is updated and linked to the epilogue loop.
+   This is later used to vectorize the epilogue.
+   The reason the loop_vec_info needs updating is that it was
    constructed based on the original main loop, and the epilogue loop is a
    copy of this loop, so all links pointing to statements in the original loop
    need updating.  Furthermore, these loop_vec_infos share the
@@ -3128,7 +3128,7 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
   profile_probability prob_prolog, prob_vector, prob_epilog;
   int estimated_vf;
   int prolog_peeling = 0;
-  bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
+  bool vect_epilogues = loop_vinfo->epilogue_vinfo != NULL;
   /* We currently do not support prolog peeling if the target alignment is not
      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
      target alignment being constant.  */
@@ -3255,13 +3255,7 @@  vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
   else
     niters_prolog = build_int_cst (type, 0);
 
-  loop_vec_info epilogue_vinfo = NULL;
-  if (vect_epilogues)
-    {
-      epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
-      loop_vinfo->epilogue_vinfos.ordered_remove (0);
-    }
-
+  loop_vec_info epilogue_vinfo = loop_vinfo->epilogue_vinfo;
   tree niters_vector_mult_vf = NULL_TREE;
   /* Saving NITERs before the loop, as this may be changed by prologue.  */
   tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
index 6059ce031d1..5de90b6573a 100644
--- a/gcc/tree-vect-loop.cc
+++ b/gcc/tree-vect-loop.cc
@@ -1071,7 +1071,9 @@  _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
     has_mask_store (false),
     scalar_loop_scaling (profile_probability::uninitialized ()),
     scalar_loop (NULL),
+    main_loop_info (NULL),
     orig_loop_info (NULL),
+    epilogue_vinfo (NULL),
     drs_advanced_by (NULL_TREE),
     vec_loop_iv_exit (NULL),
     vec_epilogue_loop_iv_exit (NULL),
@@ -1128,8 +1130,6 @@  _loop_vec_info::_loop_vec_info (class loop *loop_in, vec_info_shared *shared)
 	    }
 	}
     }
-
-  epilogue_vinfos.create (6);
 }
 
 /* Free all levels of rgroup CONTROLS.  */
@@ -1155,7 +1155,6 @@  _loop_vec_info::~_loop_vec_info ()
   release_vec_loop_controls (&lens);
   delete ivexpr_map;
   delete scan_map;
-  epilogue_vinfos.release ();
   delete scalar_costs;
   delete vector_costs;
 
@@ -1936,15 +1935,20 @@  vect_analyze_loop_form (class loop *loop, gimple *loop_vectorized_call,
 loop_vec_info
 vect_create_loop_vinfo (class loop *loop, vec_info_shared *shared,
 			const vect_loop_form_info *info,
-			loop_vec_info main_loop_info)
+			loop_vec_info orig_loop_info)
 {
   loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
   LOOP_VINFO_NITERSM1 (loop_vinfo) = info->number_of_iterationsm1;
   LOOP_VINFO_NITERS (loop_vinfo) = info->number_of_iterations;
   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = info->number_of_iterations;
-  LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = main_loop_info;
+  LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_info;
+  if (orig_loop_info && LOOP_VINFO_EPILOGUE_P (orig_loop_info))
+    LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo)
+      = LOOP_VINFO_MAIN_LOOP_INFO (orig_loop_info);
+  else
+    LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo) = orig_loop_info;
   /* Also record the assumptions for versioning.  */
-  if (!integer_onep (info->assumptions) && !main_loop_info)
+  if (!integer_onep (info->assumptions) && !orig_loop_info)
     LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo) = info->assumptions;
 
   for (gcond *cond : info->conds)
@@ -2314,17 +2318,19 @@  vect_analyze_loop_costing (loop_vec_info loop_vinfo,
 	{
 	  loop_vec_info orig_loop_vinfo
 	    = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
+	  loop_vec_info main_loop_vinfo
+	    = LOOP_VINFO_MAIN_LOOP_INFO (loop_vinfo);
 	  unsigned lowest_vf
 	    = constant_lower_bound (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo));
 	  int prolog_peeling = 0;
-	  if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
-	    prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (orig_loop_vinfo);
+	  if (!vect_use_loop_mask_for_alignment_p (main_loop_vinfo))
+	    prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (main_loop_vinfo);
 	  if (prolog_peeling >= 0
 	      && known_eq (LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo),
 			   lowest_vf))
 	    {
 	      unsigned gap
-		= LOOP_VINFO_PEELING_FOR_GAPS (orig_loop_vinfo) ? 1 : 0;
+		= LOOP_VINFO_PEELING_FOR_GAPS (main_loop_vinfo) ? 1 : 0;
 	      scalar_niters = ((scalar_niters - gap - prolog_peeling)
 			       % lowest_vf + gap);
 	    }
@@ -3440,7 +3446,7 @@  vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
   return true;
 }
 
-/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if MAIN_LOOP_VINFO is
+/* Analyze LOOP with VECTOR_MODES[MODE_I] and as epilogue if ORIG_LOOP_VINFO is
    not NULL.  Set AUTODETECTED_VECTOR_MODE if VOIDmode and advance
    MODE_I to the next mode useful to analyze.
    Return the loop_vinfo on success and wrapped null on failure.  */
@@ -3448,13 +3454,13 @@  vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
 static opt_loop_vec_info
 vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
 		     const vect_loop_form_info *loop_form_info,
-		     loop_vec_info main_loop_vinfo,
+		     loop_vec_info orig_loop_vinfo,
 		     const vector_modes &vector_modes, unsigned &mode_i,
 		     machine_mode &autodetected_vector_mode,
 		     bool &fatal)
 {
   loop_vec_info loop_vinfo
-    = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
+    = vect_create_loop_vinfo (loop, shared, loop_form_info, orig_loop_vinfo);
 
   machine_mode vector_mode = vector_modes[mode_i];
   loop_vinfo->vector_mode = vector_mode;
@@ -3471,7 +3477,7 @@  vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
 		     res ? "succeeded" : "failed",
 		     GET_MODE_NAME (loop_vinfo->vector_mode));
 
-  if (res && !main_loop_vinfo && suggested_unroll_factor > 1)
+  if (res && !LOOP_VINFO_EPILOGUE_P (loop_vinfo) && suggested_unroll_factor > 1)
     {
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
@@ -3480,7 +3486,7 @@  vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
 			 suggested_unroll_factor,
 			 slp_done_for_suggested_uf ? "on" : "off");
       loop_vec_info unroll_vinfo
-	= vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
+	= vect_create_loop_vinfo (loop, shared, loop_form_info, NULL);
       unroll_vinfo->vector_mode = vector_mode;
       unroll_vinfo->suggested_unroll_factor = suggested_unroll_factor;
       opt_result new_res = vect_analyze_loop_2 (unroll_vinfo, fatal, NULL,
@@ -3534,7 +3540,7 @@  vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
     {
       delete loop_vinfo;
       if (fatal)
-	gcc_checking_assert (main_loop_vinfo == NULL);
+	gcc_checking_assert (orig_loop_vinfo == NULL);
       return opt_loop_vec_info::propagate_failure (res);
     }
 
@@ -3746,21 +3752,17 @@  vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
 
       if (loop_vinfo)
 	{
-	  if (pick_lowest_cost_p)
+	  if (pick_lowest_cost_p
+	      && orig_loop_vinfo->epilogue_vinfo
+	      && vect_joust_loop_vinfos (loop_vinfo,
+					 orig_loop_vinfo->epilogue_vinfo))
 	    {
-	      /* Keep trying to roll back vectorization attempts while the
-		 loop_vec_infos they produced were worse than this one.  */
-	      vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
-	      while (!vinfos.is_empty ()
-		     && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
-		{
-		  gcc_assert (vect_epilogues);
-		  delete vinfos.pop ();
-		}
+	      gcc_assert (vect_epilogues);
+	      delete orig_loop_vinfo->epilogue_vinfo;
+	      orig_loop_vinfo->epilogue_vinfo = nullptr;
 	    }
-	  /* For now only allow one epilogue loop.  */
-	  if (first_loop_vinfo->epilogue_vinfos.is_empty ())
-	    first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
+	  if (!orig_loop_vinfo->epilogue_vinfo)
+	    orig_loop_vinfo->epilogue_vinfo = loop_vinfo;
 	  else
 	    {
 	      delete loop_vinfo;
@@ -3779,11 +3781,12 @@  vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
 
     }
 
-  if (!first_loop_vinfo->epilogue_vinfos.is_empty ())
+  if (first_loop_vinfo->epilogue_vinfo)
     {
       poly_uint64 lowest_th
 	= LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
-      for (auto epilog_vinfo : first_loop_vinfo->epilogue_vinfos)
+      loop_vec_info epilog_vinfo = first_loop_vinfo->epilogue_vinfo;
+      do
 	{
 	  poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (epilog_vinfo);
 	  gcc_assert (!LOOP_REQUIRES_VERSIONING (epilog_vinfo)
@@ -3791,13 +3794,15 @@  vect_analyze_loop (class loop *loop, gimple *loop_vectorized_call,
 	  /* Keep track of the known smallest versioning threshold.  */
 	  if (ordered_p (lowest_th, th))
 	    lowest_th = ordered_min (lowest_th, th);
+	  epilog_vinfo = epilog_vinfo->epilogue_vinfo;
 	}
+      while (epilog_vinfo);
       LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
       if (dump_enabled_p ())
 	dump_printf_loc (MSG_NOTE, vect_location,
 			 "***** Choosing epilogue vector mode %s\n",
 			 GET_MODE_NAME
-			   (first_loop_vinfo->epilogue_vinfos[0]->vector_mode));
+			   (first_loop_vinfo->epilogue_vinfo->vector_mode));
     }
 
   return first_loop_vinfo;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 5a1bd237beb..88db34b87b6 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -987,12 +987,17 @@  public:
   class loop *scalar_loop;
 
   /* For loops being epilogues of already vectorized loops
-     this points to the original vectorized loop.  Otherwise NULL.  */
+     this points to the main vectorized loop.  Otherwise NULL.  */
+  _loop_vec_info *main_loop_info;
+
+  /* For loops being epilogues of already vectorized loops
+     this points to the preceeding vectorized (possibly epilogue) loop.
+     Otherwise NULL.  */
   _loop_vec_info *orig_loop_info;
 
-  /* Used to store loop_vec_infos of epilogues of this loop during
+  /* Used to store loop_vec_infos of the epilogue of this loop during
      analysis.  */
-  vec<_loop_vec_info *> epilogue_vinfos;
+  _loop_vec_info *epilogue_vinfo;
 
   /* If this is an epilogue loop the DR advancement applied.  */
   tree drs_advanced_by;
@@ -1096,6 +1101,7 @@  public:
 #define LOOP_VINFO_SCALAR_LOOP_SCALING(L)  (L)->scalar_loop_scaling
 #define LOOP_VINFO_HAS_MASK_STORE(L)       (L)->has_mask_store
 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec
+#define LOOP_VINFO_MAIN_LOOP_INFO(L)       (L)->main_loop_info
 #define LOOP_VINFO_ORIG_LOOP_INFO(L)       (L)->orig_loop_info
 #define LOOP_VINFO_SIMD_IF_COND(L)         (L)->simd_if_cond
 #define LOOP_VINFO_INNER_LOOP_COST_FACTOR(L) (L)->inner_loop_cost_factor