diff mbox series

[v2] gimple ssa: Don't use __builtin_popcount in switch exp transform

Message ID ZuCAU5thGbE411fk@localhost.localdomain
State New
Headers show
Series [v2] gimple ssa: Don't use __builtin_popcount in switch exp transform | expand

Commit Message

Filip Kastl Sept. 10, 2024, 5:22 p.m. UTC
Hi,

This is the second version of this patch.  See the previous version here:

https://gcc.gnu.org/pipermail/gcc-patches/2024-September/662462.html

In this verison I added a conversion to unsigned type to the bitmagic that I
generate as Andrew suggested.

Bootstrapped and regtested on x86_64-linux.  Ok to push?

Thanks,
Filip Kastl


--- 8< ---


Switch exponential transformation in the switch conversion pass
currently generates

tmp1 = __builtin_popcount (var);
tmp2 = tmp1 == 1;

when inserting code to determine if var is power of two.  If the target
doesn't support expanding the builtin as special instructions switch
conversion relies on this whole pattern being expanded as bitmagic.
However, it is possible that other GIMPLE optimizations move the two
statements of the pattern apart.  In that case the builtin becomes a
libgcc call in the final binary.  The call is slow and in case of
freestanding programs can result in linking error (this bug was
originally found while compiling Linux kernel).

This patch modifies switch conversion to insert the bitmagic
(var ^ (var - 1)) > (var - 1) instead of the builtin.

gcc/ChangeLog:

	PR tree-optimization/116616
	* tree-switch-conversion.cc (can_pow2p): Remove this function.
	(gen_pow2p): Generate bitmagic instead of a builtin.  Remove the
	TYPE parameter.
	(switch_conversion::is_exp_index_transform_viable): Don't call
	can_pow2p.
	(switch_conversion::exp_index_transform): Call gen_pow2p without
	the TYPE parameter.
	* tree-switch-conversion.h: Remove
	m_exp_index_transform_pow2p_type.

gcc/testsuite/ChangeLog:

	PR tree-optimization/116616
	* gcc.target/i386/switch-exp-transform-1.c: Don't test for
	presence of the POPCOUNT internal fn call.

Signed-off-by: Filip Kastl <fkastl@suse.cz>
---
 .../gcc.target/i386/switch-exp-transform-1.c  |  7 +-
 gcc/tree-switch-conversion.cc                 | 84 +++++--------------
 gcc/tree-switch-conversion.h                  |  6 +-
 3 files changed, 23 insertions(+), 74 deletions(-)
diff mbox series

Patch

diff --git a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
index a8c9e03e515..4832f5b52c3 100644
--- a/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
+++ b/gcc/testsuite/gcc.target/i386/switch-exp-transform-1.c
@@ -1,10 +1,8 @@ 
 /* { dg-do compile } */
-/* { dg-options "-O2 -fdump-tree-switchconv -fdump-tree-widening_mul -mpopcnt -mbmi" } */
+/* { dg-options "-O2 -fdump-tree-switchconv -mbmi" } */
 
 /* Checks that exponential index transform enables switch conversion to convert
-   this switch into an array lookup.  Also checks that the "index variable is a
-   power of two" check has been generated and that it has been later expanded
-   into an internal function.  */
+   this switch into an array lookup.  */
 
 int foo(unsigned bar)
 {
@@ -30,4 +28,3 @@  int foo(unsigned bar)
 }
 
 /* { dg-final { scan-tree-dump "CSWTCH" "switchconv" } } */
-/* { dg-final { scan-tree-dump "POPCOUNT" "widening_mul" } } */
diff --git a/gcc/tree-switch-conversion.cc b/gcc/tree-switch-conversion.cc
index c1332a26094..00426d40000 100644
--- a/gcc/tree-switch-conversion.cc
+++ b/gcc/tree-switch-conversion.cc
@@ -133,75 +133,33 @@  gen_log2 (tree op, location_t loc, tree *result, tree type)
   return stmts;
 }
 
-/* Is it possible to efficiently check that a value of TYPE is a power of 2?
-
-   If yes, returns TYPE.  If no, returns NULL_TREE.  May also return another
-   type.  This indicates that logarithm of the variable can be computed but
-   only after it is converted to this type.
-
-   Also see gen_pow2p.  */
-
-static tree
-can_pow2p (tree type)
-{
-  /* __builtin_popcount supports the unsigned type or its long and long long
-     variants.  Choose the smallest out of those that can still fit TYPE.  */
-  int prec = TYPE_PRECISION (type);
-  int i_prec = TYPE_PRECISION (unsigned_type_node);
-  int li_prec = TYPE_PRECISION (long_unsigned_type_node);
-  int lli_prec = TYPE_PRECISION (long_long_unsigned_type_node);
-
-  if (prec <= i_prec)
-    return unsigned_type_node;
-  else if (prec <= li_prec)
-    return long_unsigned_type_node;
-  else if (prec <= lli_prec)
-    return long_long_unsigned_type_node;
-  else
-    return NULL_TREE;
-}
-
-/* Build a sequence of gimple statements checking that OP is a power of 2.  Use
-   special optabs if target supports them.  Return the result as a
-   boolean_type_node ssa name through RESULT.  Assumes that OP's value will
-   be non-negative.  The generated check may give arbitrary answer for negative
-   values.
-
-   Before computing the check, OP may have to be converted to another type.
-   This should be specified in TYPE.  Use can_pow2p to decide what this type
-   should be.
-
-   Should only be used if can_pow2p returns true for type of OP.  */
+/* Build a sequence of gimple statements checking that OP is a power of 2.
+   Return the result as a boolean_type_node ssa name through RESULT.  Assumes
+   that OP's value will be non-negative.  The generated check may give
+   arbitrary answer for negative values.  */
 
 static gimple_seq
-gen_pow2p (tree op, location_t loc, tree *result, tree type)
+gen_pow2p (tree op, location_t loc, tree *result)
 {
   gimple_seq stmts = NULL;
   gimple_stmt_iterator gsi = gsi_last (stmts);
 
-  built_in_function fn;
-  if (type == unsigned_type_node)
-    fn = BUILT_IN_POPCOUNT;
-  else if (type == long_unsigned_type_node)
-    fn = BUILT_IN_POPCOUNTL;
-  else
-    {
-      fn = BUILT_IN_POPCOUNTLL;
-      gcc_checking_assert (type == long_long_unsigned_type_node);
-    }
+  tree type = TREE_TYPE (op);
+  tree utype = unsigned_type_for (type);
 
-  tree orig_type = TREE_TYPE (op);
+  /* Build (op ^ (op - 1)) > (op - 1).  */
   tree tmp1;
-  if (type != orig_type)
-    tmp1 = gimple_convert (&gsi, false, GSI_NEW_STMT, loc, type, op);
-  else
+  if (types_compatible_p (type, utype))
     tmp1 = op;
-  /* Build __builtin_popcount{l,ll} (op) == 1.  */
-  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc,
-			    as_combined_fn (fn), integer_type_node, tmp1);
-  *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, EQ_EXPR,
-			  boolean_type_node, tmp2,
-			  build_one_cst (integer_type_node));
+  else
+    tmp1 = gimple_convert (&gsi, false, GSI_NEW_STMT, loc, utype, op);
+  tree tmp2 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, MINUS_EXPR, utype,
+			    tmp1, build_one_cst (utype));
+  tree tmp3 = gimple_build (&gsi, false, GSI_NEW_STMT, loc, BIT_XOR_EXPR,
+			    utype, tmp1, tmp2);
+  *result = gimple_build (&gsi, false, GSI_NEW_STMT, loc, GT_EXPR,
+			  boolean_type_node, tmp3, tmp2);
+
   return stmts;
 }
 
@@ -371,9 +329,6 @@  switch_conversion::is_exp_index_transform_viable (gswitch *swtch)
   m_exp_index_transform_log2_type = can_log2 (index_type, opt_type);
   if (!m_exp_index_transform_log2_type)
     return false;
-  m_exp_index_transform_pow2p_type = can_pow2p (index_type);
-  if (!m_exp_index_transform_pow2p_type)
-    return false;
 
   /* Check that each case label corresponds only to one value
      (no case 1..3).  */
@@ -467,8 +422,7 @@  switch_conversion::exp_index_transform (gswitch *swtch)
   new_edge2->probability = profile_probability::even ();
 
   tree tmp;
-  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp,
-				m_exp_index_transform_pow2p_type);
+  gimple_seq stmts = gen_pow2p (index, UNKNOWN_LOCATION, &tmp);
   gsi = gsi_last_bb (cond_bb);
   gsi_insert_seq_after (&gsi, stmts, GSI_LAST_NEW_STMT);
   gcond *stmt_cond = gimple_build_cond (NE_EXPR, tmp, boolean_false_node,
diff --git a/gcc/tree-switch-conversion.h b/gcc/tree-switch-conversion.h
index 14610499e5f..6468995eb31 100644
--- a/gcc/tree-switch-conversion.h
+++ b/gcc/tree-switch-conversion.h
@@ -920,11 +920,9 @@  public:
   bool m_exp_index_transform_applied;
 
   /* If switch conversion decided exponential index transform is viable, here
-     will be stored the types to which index variable has to be converted
-     before the logarithm and the "is power of 2" operations which are part of
-     the transform.  */
+     will be stored the type to which index variable has to be converted
+     before the logarithm operation which is a part of the transform.  */
   tree m_exp_index_transform_log2_type;
-  tree m_exp_index_transform_pow2p_type;
 };
 
 void