diff mbox series

[PATCHv2,1/2] cfgexpand: Handle scope conflicts better [PR111422]

Message ID 20241017024205.2660484-1-quic_apinski@quicinc.com
State New
Headers show
Series [PATCHv2,1/2] cfgexpand: Handle scope conflicts better [PR111422] | expand

Commit Message

Andrew Pinski Oct. 17, 2024, 2:42 a.m. UTC
After fixing loop-im to do the correct overflow rewriting
for pointer types too. We end up with code like:
```
  _9 = (unsigned long) &g;
  _84 = _9 + 18446744073709551615;
  _11 = _42 + _84;
  _44 = (signed char *) _11;
...
  *_44 = 10;
  g ={v} {CLOBBER(eos)};
...
  n[0] = &f;
  *_44 = 8;
  g ={v} {CLOBBER(eos)};
```
Which was not being recongized by the scope conflicts code.
This was because it only handled one level walk backs rather than multiple ones.
This fixes it by using a work_list to avoid huge recursion and a visited bitmap to avoid
going into an infinite loops when dealing with loops.
Adds a cache for the addr_expr's that are associated with each ssa name. I found 2 element
cache was the decent trade off for size and speed.  Most ssa names will have only
one address associated with it but there are times (phis) where 2 or more will
be there. But 2 is the common case for most if statements.

gcc/ChangeLog:

	PR middle-end/111422
	* cfgexpand.cc: Define INCLUDE_STRING if ADDR_WALKER_STATS
	is defined.
	(class addr_ssa_walker): New class.
	(add_scope_conflicts_2): Rename to ...
	(addr_ssa_walker::operator()): This and rewrite to be a full walk
	of all operands and their uses and use a cache.
	(add_scope_conflicts_1): Add walker new argument for the addr cache.
	Just walk the phi result since that will include all addr_exprs.
	Change call to add_scope_conflicts_2 to walker.
	(add_scope_conflicts): Add walker variable and update call to
	add_scope_conflicts_1.

Signed-off-by: Andrew Pinski <quic_apinski@quicinc.com>
---
 gcc/cfgexpand.cc | 207 ++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 176 insertions(+), 31 deletions(-)
diff mbox series

Patch

diff --git a/gcc/cfgexpand.cc b/gcc/cfgexpand.cc
index 6c1096363af..74f4cfc0f22 100644
--- a/gcc/cfgexpand.cc
+++ b/gcc/cfgexpand.cc
@@ -17,6 +17,9 @@  You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#ifdef ADDR_WALKER_STATS
+#define INCLUDE_STRING
+#endif
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -571,35 +574,175 @@  visit_conflict (gimple *, tree op, tree, void *data)
   return false;
 }
 
-/* Helper function for add_scope_conflicts_1.  For USE on
-   a stmt, if it is a SSA_NAME and in its SSA_NAME_DEF_STMT is known to be
-   based on some ADDR_EXPR, invoke VISIT on that ADDR_EXPR.  */
+namespace {
 
-static inline void
-add_scope_conflicts_2 (tree use, bitmap work,
-		       walk_stmt_load_store_addr_fn visit)
+class addr_ssa_walker
+{
+private:
+  struct addr_cache
+  {
+  private:
+    unsigned elems = 0;
+    static constexpr unsigned maxelements = 2;
+    bool visited = false;
+    tree cached[maxelements] = {};
+  public:
+    /* Returns true if the cache is valid. */
+    operator bool ()
+    {
+      return visited && elems <= maxelements;
+    }
+    /* Mark as visited. The cache might be invalidated
+       by adding too many elements though. */
+    void visit () { visited = true; }
+    /* Iterator over the cached values. */
+    tree *begin () { return &cached[0]; }
+    tree *end ()
+    {
+      /* If there was too many elements, then there are
+	 nothing to vist in the cache. */
+      if (elems > maxelements)
+	return &cached[0];
+      return &cached[elems];
+    }
+    /* Add ADDR_EXPR to the cache if it is not there already. */
+    void add (tree addr)
+    {
+      if (elems > maxelements)
+	{
+	  statistics_counter_event (cfun, "addr_walker already overflow", 1);
+	  return;
+	}
+      /* Skip if the cache already contains the addr_expr. */
+      for(tree other : *this)
+	if (operand_equal_p (other, addr))
+	  return;
+      elems++;
+      /* Record that the cache overflowed. */
+      if (elems > maxelements)
+	{
+	  statistics_counter_event (cfun, "addr_walker overflow", 1);
+	  return;
+	}
+      cached[elems - 1] = addr;
+    }
+  };
+public:
+  addr_ssa_walker () : cache (new addr_cache[num_ssa_names]{}) { }
+  ~addr_ssa_walker (){ delete[] cache; }
+
+  /* Walk the name and its defining statement,
+     call func with for addr_expr's. */
+  template<typename T>
+  void operator ()(tree name, T func);
+
+private:
+
+  /* Cannot create a copy. */
+  addr_ssa_walker (const addr_ssa_walker &) = delete;
+  addr_ssa_walker (addr_ssa_walker &&) = delete;
+  /* Return the cache entries for a SSA NAME. */
+  addr_cache &operator[] (tree name)
+  {
+    return cache[SSA_NAME_VERSION (name)];
+  }
+
+  addr_cache *cache;
+};
+
+/* Walk backwards on the defining statements of NAME
+   and call FUNC on the addr_expr. Use the cache for
+   the SSA name if possible.  */
+
+template<typename T>
+void
+addr_ssa_walker::operator() (tree name, T func)
 {
-  if (TREE_CODE (use) == SSA_NAME
-      && (POINTER_TYPE_P (TREE_TYPE (use))
-	  || INTEGRAL_TYPE_P (TREE_TYPE (use))))
+  gcc_assert (TREE_CODE (name) == SSA_NAME);
+  auto_vec<std::pair<tree,tree>, 4> work_list;
+  auto_bitmap visited_ssa_names;
+  work_list.safe_push (std::make_pair (name, name));
+
+#ifdef ADDR_WALKER_STATS
+  unsigned process_list = 0;
+#endif
+
+  while (!work_list.is_empty())
     {
+      auto work = work_list.pop();
+      tree use = work.first;
+      tree old_name = work.second;
+#ifdef ADDR_WALKER_STATS
+      process_list++;
+#endif
+
+      if (!use)
+	continue;
+      /* For addr_expr's call the function and update the cache. */
+      if (TREE_CODE (use) == ADDR_EXPR)
+	{
+	  func (use);
+	  (*this)[old_name].add (use);
+	  continue;
+	}
+      /* Ignore all non SSA names. */
+      if (TREE_CODE (use) != SSA_NAME)
+	continue;
+
+      /* Only Pointers and integral types are used to track addresses.  */
+      if (!POINTER_TYPE_P (TREE_TYPE (use))
+	  && !INTEGRAL_TYPE_P (TREE_TYPE (use)))
+	continue;
+
+      /* Check the cache, if there is a hit use it.  */
+      if ((*this)[use])
+	{
+	  statistics_counter_event (cfun, "addr_walker cache hit", 1);
+	  /* Call the function and update the cache.  */
+	  for (tree naddr : (*this)[use])
+	    {
+	      (*this)[old_name].add (naddr);
+	      func (naddr);
+	    }
+	  continue;
+	}
       gimple *g = SSA_NAME_DEF_STMT (use);
+      /* Mark the use as being visited so even if the cache is empty,
+	 there is no reason to walk the names backwards. */
+      (*this)[use].visit();
+      /* Skip the name if already visted this time.  */
+      if (!bitmap_set_bit (visited_ssa_names, SSA_NAME_VERSION (use)))
+	continue;
+      /* Need to update the old_name afterwards add it to the work list,
+	 this will either hit the cache or the check bitmap will skip it
+	 if there was too many names associated with the cache. */
+      work_list.safe_push (work);
+
+      /* For assign statements, add each operand to the work list.
+	 Note operand 0 is the same as the use here so there is nothing
+	 to be done.  */
       if (gassign *a = dyn_cast <gassign *> (g))
 	{
-	  if (tree op = gimple_assign_rhs1 (a))
-	    if (TREE_CODE (op) == ADDR_EXPR)
-	      visit (a, TREE_OPERAND (op, 0), op, work);
+	  for (unsigned i = 1; i < gimple_num_ops (g); i++)
+	    work_list.safe_push (std::make_pair (gimple_op (a, i), use));
 	}
+      /* PHI nodes, add each argument to the work list.  */
       else if (gphi *p = dyn_cast <gphi *> (g))
 	for (unsigned i = 0; i < gimple_phi_num_args (p); ++i)
-	  if (TREE_CODE (use = gimple_phi_arg_def (p, i)) == SSA_NAME)
-	    if (gassign *a = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (use)))
-	      {
-		if (tree op = gimple_assign_rhs1 (a))
-		  if (TREE_CODE (op) == ADDR_EXPR)
-		    visit (a, TREE_OPERAND (op, 0), op, work);
-	      }
-    }
+	  work_list.safe_push (std::make_pair (gimple_phi_arg_def (p, i), use));
+      /* All other kind of statements assume they can't contribute to an address
+	 being alive.  */
+    }
+  /* This stat is here to see how long is the longest walk,
+     it is not useful stat except when tuning
+     addr_ssa_walker::addr_cache::maxelements.  */
+#ifdef ADDR_WALKER_STATS
+  statistics_counter_event (cfun,
+			    ("addr_walker process " + std::to_string (process_list)).c_str (),
+			    1);
+#endif
+}
+
 }
 
 /* Helper routine for add_scope_conflicts, calculating the active partitions
@@ -608,7 +751,8 @@  add_scope_conflicts_2 (tree use, bitmap work,
    liveness.  */
 
 static void
-add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
+add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict,
+		       addr_ssa_walker &walker)
 {
   edge e;
   edge_iterator ei;
@@ -623,14 +767,14 @@  add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
 
   visit = visit_op;
 
+  auto addrvisitor = [&visit,&work](tree addr) {
+		   gcc_assert (TREE_CODE (addr) == ADDR_EXPR);
+		   visit (nullptr, TREE_OPERAND (addr, 0), addr, work);
+		 };
+
+  /* Walk over the phi for the incoming addresses to be alive.  */
   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-    {
-      gimple *stmt = gsi_stmt (gsi);
-      gphi *phi = as_a <gphi *> (stmt);
-      walk_stmt_load_store_addr_ops (stmt, work, NULL, NULL, visit);
-      FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
-	add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
-    }
+    walker (gimple_phi_result (gsi_stmt (gsi)), addrvisitor);
   for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
     {
       gimple *stmt = gsi_stmt (gsi);
@@ -676,7 +820,7 @@  add_scope_conflicts_1 (basic_block bb, bitmap work, bool for_conflict)
 	    }
 	  walk_stmt_load_store_addr_ops (stmt, work, visit, visit, visit);
 	  FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
-	    add_scope_conflicts_2 (USE_FROM_PTR (use_p), work, visit);
+	    walker (USE_FROM_PTR (use_p), addrvisitor);
 	}
     }
 
@@ -707,6 +851,7 @@  add_scope_conflicts (void)
   bitmap work = BITMAP_ALLOC (NULL);
   int *rpo;
   int n_bbs;
+  addr_ssa_walker walker;
 
   /* We approximate the live range of a stack variable by taking the first
      mention of its name as starting point(s), and by the end-of-scope
@@ -734,14 +879,14 @@  add_scope_conflicts (void)
 	  bitmap active;
 	  bb = BASIC_BLOCK_FOR_FN (cfun, rpo[i]);
 	  active = (bitmap)bb->aux;
-	  add_scope_conflicts_1 (bb, work, false);
+	  add_scope_conflicts_1 (bb, work, false, walker);
 	  if (bitmap_ior_into (active, work))
 	    changed = true;
 	}
     }
 
   FOR_EACH_BB_FN (bb, cfun)
-    add_scope_conflicts_1 (bb, work, true);
+    add_scope_conflicts_1 (bb, work, true, walker);
 
   free (rpo);
   BITMAP_FREE (work);