diff mbox

More pow(x,c) expansions in cse_sincos pass (PR46728, patch 3)

Message ID 1306345412.4821.56.camel@L3G5336.ibm.com
State New
Headers show

Commit Message

Bill Schmidt May 25, 2011, 5:43 p.m. UTC
This patch adds logic to gimple_expand_builtin_pow () to optimize
pow(x,y), where y is one of 0.5, 0.25, 0.75, 1./3., or 1./6.  I noticed
that there were two missing calls to gimple_set_location () in my
previous patch, so I've corrected those here as well.

There's one TODO comment in this patch.  I don't believe the test for
TREE_SIDE_EFFECTS (arg0) should be necessary; but I'm not convinced it
was necessary in the code whence I copied it, either, so I left it in
for comment in case I'm misunderstanding something.


2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>

	PR tree-optimization/46728
	* tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
	(powi_as_mults): Add gimple_set_location.
	(build_and_insert_call): New.
	(gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
	0.5, 0.25, 0.75, 1./3., or 1./6.

Comments

Richard Biener May 26, 2011, 9:22 a.m. UTC | #1
On Wed, May 25, 2011 at 7:43 PM, William J. Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> This patch adds logic to gimple_expand_builtin_pow () to optimize
> pow(x,y), where y is one of 0.5, 0.25, 0.75, 1./3., or 1./6.  I noticed
> that there were two missing calls to gimple_set_location () in my
> previous patch, so I've corrected those here as well.
>
> There's one TODO comment in this patch.  I don't believe the test for
> TREE_SIDE_EFFECTS (arg0) should be necessary; but I'm not convinced it
> was necessary in the code whence I copied it, either, so I left it in
> for comment in case I'm misunderstanding something.
>
>
> 2011-05-25  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
>
>        PR tree-optimization/46728
>        * tree-ssa-math-opts.c (powi_as_mults_1): Add gimple_set_location.
>        (powi_as_mults): Add gimple_set_location.
>        (build_and_insert_call): New.
>        (gimple_expand_builtin_pow): Add handling for pow(x,y) when y is
>        0.5, 0.25, 0.75, 1./3., or 1./6.
>
> Index: gcc/tree-ssa-math-opts.c
> ===================================================================
> --- gcc/tree-ssa-math-opts.c    (revision 174199)
> +++ gcc/tree-ssa-math-opts.c    (working copy)
> @@ -965,6 +965,7 @@ powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
>     }
>
>   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
> +  gimple_set_location (mult_stmt, loc);
>   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
>
>   return ssa_target;
> @@ -999,6 +1000,7 @@ powi_as_mults (gimple_stmt_iterator *gsi, location
>   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
>                                           build_real (type, dconst1),
>                                           result);
> +  gimple_set_location (div_stmt, loc);
>   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
>
>   return target;
> @@ -1024,6 +1026,34 @@ gimple_expand_builtin_powi (gimple_stmt_iterator *
>   return NULL_TREE;
>  }
>
> +/* Build a gimple call statement that calls FN with argument ARG.
> +   Set the lhs of the call statement to a fresh SSA name for
> +   variable VAR.  If VAR is NULL, first allocate it.  Insert the
> +   statement prior to GSI's current position, and return the fresh
> +   SSA name.  */
> +
> +static tree
> +build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
> +                      tree *var, location_t loc)
> +{
> +  gimple call_stmt;
> +  tree ssa_target;
> +
> +  if (!*var)
> +    {
> +      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
> +      add_referenced_var (*var);
> +    }
> +
> +  call_stmt = gimple_build_call (fn, 1, arg);
> +  ssa_target = make_ssa_name (*var, NULL);
> +  gimple_set_lhs (call_stmt, ssa_target);
> +  gimple_set_location (call_stmt, loc);
> +  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
> +
> +  return ssa_target;
> +}
> +
>  /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
>    with location info LOC.  If possible, create an equivalent and
>    less expensive sequence of statements prior to GSI, and return an
> @@ -1035,6 +1065,8 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>  {
>   REAL_VALUE_TYPE c, cint;
>   HOST_WIDE_INT n;
> +  tree type, sqrtfn, target = NULL_TREE;
> +  enum machine_mode mode;
>
>   /* If the exponent isn't a constant, there's nothing of interest
>      to be done.  */
> @@ -1054,6 +1086,108 @@ gimple_expand_builtin_pow (gimple_stmt_iterator *g
>              && powi_cost (n) <= POWI_MAX_MULTS)))
>     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
>
> +  /* Attempt various optimizations using sqrt and cbrt.  */
> +  type = TREE_TYPE (arg0);
> +  mode = TYPE_MODE (type);
> +  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
> +
> +  if (flag_unsafe_math_optimizations && sqrtfn != NULL_TREE)
> +    {
> +      REAL_VALUE_TYPE dconst1_4, dconst3_4;
> +      tree cbrtfn;
> +      bool hw_sqrt_exists;
> +
> +      /* Optimize pow(x,0.5) = sqrt(x).  */
> +      if (REAL_VALUES_EQUAL (c, dconsthalf))
> +       return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);

This should be done if !HONOR_SIGNED_ZEROS instead.

> +
> +      /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  */
> +      dconst1_4 = dconst1;
> +      SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
> +      hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
> +
> +      if (REAL_VALUES_EQUAL (c, dconst1_4) && hw_sqrt_exists)
> +       {
> +         tree sqrt_arg0;
> +
> +         /* sqrt(x)  */
> +         sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +         /* sqrt(sqrt(x))  */
> +         return build_and_insert_call (gsi, sqrtfn, sqrt_arg0, &target, loc);
> +       }
> +
> +      /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)).  */
> +      real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
> +      SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
> +
> +      if (optimize_function_for_speed_p (cfun)
> +         && !TREE_SIDE_EFFECTS (arg0) /* TODO: is this needed?  */

Not needed as we are in gimple form.

> +         && REAL_VALUES_EQUAL (c, dconst3_4)
> +         && hw_sqrt_exists)
> +       {
> +         tree sqrt_arg0, sqrt_sqrt, ssa_target;
> +         gimple mult_stmt;
> +
> +         /* sqrt(x)  */
> +         sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
> +
> +         /* sqrt(sqrt(x))  */
> +         sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0,
> +                                            &target, loc);
> +         /* sqrt(x) * sqrt(sqrt(x))  */
> +         ssa_target = make_ssa_name (target, NULL);
> +         mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
> +                                                   sqrt_arg0, sqrt_sqrt);
> +         gimple_set_location (mult_stmt, loc);
> +         gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
> +
> +         return ssa_target;
> +       }
> +
> +      /* Optimize pow(x,1./3.) = cbrt(x), and pow(x,1./6.) = cbrt(sqrt(x)).  */
> +      cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
> +
> +      if (cbrtfn != NULL_TREE)
> +       {
> +         /* pow(x,1./3.) => cbrt(x).  */
> +         const REAL_VALUE_TYPE dconst1_3
> +           = real_value_truncate (mode, dconst_third ());
> +
> +         if (REAL_VALUES_EQUAL (c, dconst1_3))
> +           return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
> +
> +         /* pow(x,1./6.) => cbrt(sqrt(x)).  */
> +         if (optimize_function_for_speed_p (cfun) && hw_sqrt_exists)
> +           {
> +             REAL_VALUE_TYPE dconst1_6 = dconst1_3;
> +             SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
> +
> +             if (REAL_VALUES_EQUAL (c, dconst1_6))
> +               {
> +                 tree sqrt_arg0;
> +
> +                 /* sqrt(x)  */
> +                 sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0,
> +                                                    &target, loc);
> +                 /* cbrt(sqrt(x))  */
> +                 return build_and_insert_call (gsi, cbrtfn, sqrt_arg0,
> +                                               &target, loc);
> +               }
> +           }
> +       }
> +    }

There are several extra pre-conditions in the original code in
builtins.c as well as commens for reasonings (yes, there seem
to be duplicates of several of the transforms there ...).  Please
try to preserve them.  I noticed especially

              /* Optimize pow (x, 0.25) into sqrt (sqrt (x)).  Assume on most
                 machines that a builtin sqrt instruction is smaller than a
                 call to pow with 0.25, so do this optimization even if
                 -Os.  */

and

      /* Check whether we can do cbrt insstead of pow (x, 1./3.) and
         cbrt/sqrts instead of pow (x, 1./6.).  */
      if (cbrtfn && ! op
          && (tree_expr_nonnegative_p (arg0) || !HONOR_NANS (mode)))

where you omit the non-negative check and the check for NANs.
Note that -funsafe-math-optimizations does not mean you can
ignore nans or signed zero issues.  pow(x,1/3) -> cbrt(x) does not
need a -funsafe-math-optimization check either.

It would be nice if you could re-arrange this function so that before
each individual replacement all preconditions are checked (even if
that means duplicating some checks and code) - that way
reviewing and later maintainance should be much easier.

Thanks,
Richard.

> +  /* We allow one transformation when flag_unsafe_math_optimizations is
> +     false.  Replacing pow(x,0.5) with sqrt(x) is safe, provided signed
> +     zeros must not be maintained.  For x = -0, the former produces +0,
> +     and the latter produces -0.  */
> +  else if (sqrtfn != NULL_TREE
> +          && !HONOR_SIGNED_ZEROS (mode)
> +          && REAL_VALUES_EQUAL (c, dconsthalf))
> +
> +    return build_and_insert_call (gsi, sqrtfn, arg0, &target, l
diff mbox

Patch

Index: gcc/tree-ssa-math-opts.c
===================================================================
--- gcc/tree-ssa-math-opts.c	(revision 174199)
+++ gcc/tree-ssa-math-opts.c	(working copy)
@@ -965,6 +965,7 @@  powi_as_mults_1 (gimple_stmt_iterator *gsi, locati
     }
 
   mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
+  gimple_set_location (mult_stmt, loc);
   gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
 
   return ssa_target;
@@ -999,6 +1000,7 @@  powi_as_mults (gimple_stmt_iterator *gsi, location
   div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target, 
 					   build_real (type, dconst1),
 					   result);
+  gimple_set_location (div_stmt, loc);
   gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
 
   return target;
@@ -1024,6 +1026,34 @@  gimple_expand_builtin_powi (gimple_stmt_iterator *
   return NULL_TREE;
 }
 
+/* Build a gimple call statement that calls FN with argument ARG.
+   Set the lhs of the call statement to a fresh SSA name for
+   variable VAR.  If VAR is NULL, first allocate it.  Insert the
+   statement prior to GSI's current position, and return the fresh
+   SSA name.  */
+
+static tree
+build_and_insert_call (gimple_stmt_iterator *gsi, tree fn, tree arg,
+		       tree *var, location_t loc)
+{
+  gimple call_stmt;
+  tree ssa_target;
+
+  if (!*var)
+    {
+      *var = create_tmp_var (TREE_TYPE (arg), "powroot");
+      add_referenced_var (*var);
+    }
+
+  call_stmt = gimple_build_call (fn, 1, arg);
+  ssa_target = make_ssa_name (*var, NULL);
+  gimple_set_lhs (call_stmt, ssa_target);
+  gimple_set_location (call_stmt, loc);
+  gsi_insert_before (gsi, call_stmt, GSI_SAME_STMT);
+
+  return ssa_target;
+}
+
 /* ARG0 and ARG1 are the two arguments to a pow builtin call in GSI
    with location info LOC.  If possible, create an equivalent and
    less expensive sequence of statements prior to GSI, and return an
@@ -1035,6 +1065,8 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
 {
   REAL_VALUE_TYPE c, cint;
   HOST_WIDE_INT n;
+  tree type, sqrtfn, target = NULL_TREE;
+  enum machine_mode mode;
 
   /* If the exponent isn't a constant, there's nothing of interest
      to be done.  */
@@ -1054,6 +1086,108 @@  gimple_expand_builtin_pow (gimple_stmt_iterator *g
 	      && powi_cost (n) <= POWI_MAX_MULTS)))
     return gimple_expand_builtin_powi (gsi, loc, arg0, n);
 
+  /* Attempt various optimizations using sqrt and cbrt.  */
+  type = TREE_TYPE (arg0);
+  mode = TYPE_MODE (type);
+  sqrtfn = mathfn_built_in (type, BUILT_IN_SQRT);
+
+  if (flag_unsafe_math_optimizations && sqrtfn != NULL_TREE)
+    {
+      REAL_VALUE_TYPE dconst1_4, dconst3_4;
+      tree cbrtfn;
+      bool hw_sqrt_exists;
+
+      /* Optimize pow(x,0.5) = sqrt(x).  */
+      if (REAL_VALUES_EQUAL (c, dconsthalf))
+	return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+      /* Optimize pow(x,0.25) = sqrt(sqrt(x)).  */
+      dconst1_4 = dconst1;
+      SET_REAL_EXP (&dconst1_4, REAL_EXP (&dconst1_4) - 2);
+      hw_sqrt_exists = optab_handler(sqrt_optab, mode) != CODE_FOR_nothing;
+
+      if (REAL_VALUES_EQUAL (c, dconst1_4) && hw_sqrt_exists)
+	{
+	  tree sqrt_arg0;
+
+	  /* sqrt(x)  */
+	  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+	  /* sqrt(sqrt(x))  */
+	  return build_and_insert_call (gsi, sqrtfn, sqrt_arg0,	&target, loc);
+	}
+      
+      /* Optimize pow(x,0.75) = sqrt(x) * sqrt(sqrt(x)).  */
+      real_from_integer (&dconst3_4, VOIDmode, 3, 0, 0);
+      SET_REAL_EXP (&dconst3_4, REAL_EXP (&dconst3_4) - 2);
+
+      if (optimize_function_for_speed_p (cfun)
+	  && !TREE_SIDE_EFFECTS (arg0) /* TODO: is this needed?  */
+	  && REAL_VALUES_EQUAL (c, dconst3_4)
+	  && hw_sqrt_exists)
+	{
+	  tree sqrt_arg0, sqrt_sqrt, ssa_target;
+	  gimple mult_stmt;
+
+	  /* sqrt(x)  */
+	  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
+	  /* sqrt(sqrt(x))  */
+	  sqrt_sqrt = build_and_insert_call (gsi, sqrtfn, sqrt_arg0,
+					     &target, loc);
+	  /* sqrt(x) * sqrt(sqrt(x))  */
+	  ssa_target = make_ssa_name (target, NULL);
+	  mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target,
+						    sqrt_arg0, sqrt_sqrt);
+	  gimple_set_location (mult_stmt, loc);
+	  gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
+
+	  return ssa_target;
+	}
+
+      /* Optimize pow(x,1./3.) = cbrt(x), and pow(x,1./6.) = cbrt(sqrt(x)).  */
+      cbrtfn = mathfn_built_in (type, BUILT_IN_CBRT);
+
+      if (cbrtfn != NULL_TREE)
+	{
+	  /* pow(x,1./3.) => cbrt(x).  */
+	  const REAL_VALUE_TYPE dconst1_3
+	    = real_value_truncate (mode, dconst_third ());
+
+	  if (REAL_VALUES_EQUAL (c, dconst1_3))
+	    return build_and_insert_call (gsi, cbrtfn, arg0, &target, loc);
+
+	  /* pow(x,1./6.) => cbrt(sqrt(x)).  */
+	  if (optimize_function_for_speed_p (cfun) && hw_sqrt_exists)
+	    {
+	      REAL_VALUE_TYPE dconst1_6 = dconst1_3;
+	      SET_REAL_EXP (&dconst1_6, REAL_EXP (&dconst1_6) - 1);
+
+	      if (REAL_VALUES_EQUAL (c, dconst1_6))
+		{
+		  tree sqrt_arg0;
+
+		  /* sqrt(x)  */
+		  sqrt_arg0 = build_and_insert_call (gsi, sqrtfn, arg0,
+						     &target, loc);
+		  /* cbrt(sqrt(x))  */
+		  return build_and_insert_call (gsi, cbrtfn, sqrt_arg0,
+						&target, loc);
+		}
+	    }
+	}
+    }
+
+  /* We allow one transformation when flag_unsafe_math_optimizations is
+     false.  Replacing pow(x,0.5) with sqrt(x) is safe, provided signed
+     zeros must not be maintained.  For x = -0, the former produces +0,
+     and the latter produces -0.  */
+  else if (sqrtfn != NULL_TREE
+	   && !HONOR_SIGNED_ZEROS (mode)
+	   && REAL_VALUES_EQUAL (c, dconsthalf))
+
+    return build_and_insert_call (gsi, sqrtfn, arg0, &target, loc);
+
   return NULL_TREE;
 }