===================================================================
@@ -455,6 +455,7 @@ extern struct gimple_opt_pass pass_tm_memopt;
extern struct gimple_opt_pass pass_tm_edges;
extern struct gimple_opt_pass pass_split_functions;
extern struct gimple_opt_pass pass_feedback_split_functions;
+extern struct gimple_opt_pass pass_strength_reduction;
/* IPA Passes */
extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
===================================================================
@@ -1,3 +1,26 @@
+2012-01-09 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
+
+ * tree-pass.h: Declare pass_strength_reduction.
+ * tree-ssa-loop-ivopts.c (includes): Remove #undef add_cost.
+ (add_cost): Rename to add_regs_cost.
+ (multiply_regs_cost): New function.
+ (add_const_cost): Likewise.
+ (extend_or_trunc_cost): Likewise.
+ (negate_cost): Likewise.
+ (get_address_cost): Rename add_cost to add_regs_cost.
+ (force_expr_to_var_cost): Likewise.
+ (get_computation_cost_at): Likewise.
+ (determine_iv_cost): Likewise.
+ * timevar.def: Add TV_TREE_SLSR.
+ * tree-ssa-strength-reduction.c: New file.
+ * tree-flow.h (add_regs_cost): Declare.
+ (multiply_regs_cost): Likewise.
+ (add_const_cost): Likewise.
+ (extend_or_trunc_cost): Likewise.
+ (negate_cost): Likewise.
+ * Makefile.in: Add tree-ssa-strength-reduction.o deps.
+ * passes.c: Add strength reduction pass.
+
2012-01-06 Arnaud Charlet <charlet@adacore.com>
* c-decl.c (ext_block): Moved up.
===================================================================
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern void foo (int);
+
+void
+f (int *p, unsigned int n)
+{
+ foo (*(p + n * 4));
+ foo (*(p + 32 + n * 4));
+ if (n > 3)
+ foo (*(p + 16 + n * 4));
+ else
+ foo (*(p + 48 + n * 4));
+}
+
+/* { dg-final { scan-tree-dump-times "\\+ 128" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 64" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 192" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern void foo (int);
+
+void
+f (int *p, int n)
+{
+ foo (*(p + n++ * 4));
+ foo (*(p + 32 + n++ * 4));
+ foo (*(p + 16 + n * 4));
+}
+
+/* { dg-final { scan-tree-dump-times "\\+ 144" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 96" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,23 @@
+/* Verify straight-line strength reduction for simple integer addition
+ with stride reversed on 1st and 3rd instances. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a1, a2, a3, x1, x2, x3, x;
+
+ a1 = 2 * s;
+ x1 = a1 + c;
+ a2 = 4 * s;
+ x2 = c + a2;
+ a3 = 6 * s;
+ x3 = a3 + c;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+foo (int a[], int b[], int i)
+{
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ return i;
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 4" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 8" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 12" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,24 @@
+/* Verify straight-line strength reduction for simple integer addition
+ with casts thrown in. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+long
+f (int s, long c)
+{
+ int a1, a2, a3;
+ long x1, x2, x3, x;
+
+ a1 = 2 * s;
+ x1 = c + a1;
+ a2 = 4 * s;
+ x2 = c + a2;
+ a3 = 6 * s;
+ x3 = c + a3;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slsr -fdump-tree-optimized" } */
+
+void foo (int);
+
+int
+f (int i)
+{
+ int x, y;
+
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+
+ i = i + 5;
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+
+ i = i - 4;
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\* 10" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 20;" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\- 16;" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\- 160" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\* 10" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 40" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "slsr" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,21 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with stride in inconsistent positions. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int c, int s)
+{
+ int x1, x2, y1, y2;
+
+ y1 = c + 2;
+ x1 = y1 * s;
+ y2 = y1 + 2;
+ x2 = s * y2;
+ return x1 + x2;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* s" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 2" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,30 @@
+/* Verify that no straight-line strength reduction occurs across sibling
+ blocks. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a1, a2, a3, x1, x2, x3, x;
+
+ if (c > 0)
+ {
+ a1 = 2 * s;
+ x1 = c + a1;
+ }
+ else
+ {
+ a1 = 4 * s;
+ x1 = c + a1;
+ }
+
+ a2 = 6 * s;
+ x2 = c + a2;
+ x = x1 + x2;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 3 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* Verify straight-line strength reduction for simple add candidates. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a1, a2, a3, x1, x2, x3, x;
+
+ a1 = 2 * s;
+ x1 = c + a1;
+ a2 = 4 * s;
+ x2 = c + a2;
+ a3 = 6 * s;
+ x3 = c + a3;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,32 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+
+ if (n > 64)
+ {
+ x1 = x + 3;
+ a += x1 * stride;
+ x2 = x1 + 3;
+ a += x2 * stride;
+ }
+ else
+ {
+ x3 = x + 3;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,25 @@
+/* x2 and x3 will be strength-reduced based on the same statement
+ but with different variables as the stride. Note that they will
+ be strength-reduced by introducing an initializer 4*s which is
+ cheaper than 5*s; similar for 4*c and 5*c. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a2, a3, x1, x2, x3, x;
+
+ x1 = c + s;
+ a2 = 5 * s;
+ x2 = c + a2;
+ a3 = 5 * c;
+ x3 = s + a3;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* 4" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 5" 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,25 @@
+/* Verify straight-line strength reduction for simple add candidates,
+ pointer version. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+void
+f (int s, char *c, char *x1, char *x2, char *x3)
+{
+ int a1, a2, a3;
+
+ a1 = 2 * s;
+ x1 = c + a1;
+ *x1 = 1;
+ a2 = 4 * s;
+ x2 = c + a2;
+ *x2 = 2;
+ a3 = 6 * s;
+ x3 = c + a3;
+ *x3 = 3;
+}
+
+/* There will be four ' * ' instances for the parms, one in the code. */
+/* { dg-final { scan-tree-dump-times " \\* " 5 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+
+ if (n > 64)
+ {
+ x1 = x + 3;
+ a += x1 * stride;
+ x2 = x1 + 3;
+ a += x2 * stride;
+ x3 = x2 + 3;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,32 @@
+/* Straight-line strength reduction control flow variation. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int c, int s)
+{
+ int a1, a2, x, x1, x2, x3, x4;
+
+ a1 = 2 * s;
+
+ if (n > 64)
+ {
+ x1 = c + a1;
+ a2 = 4 * s;
+ x2 = c + a2;
+ x = x1 + x2;
+ }
+ else
+ {
+ x3 = c + a1;
+ a2 = 4 * s;
+ x4 = c + a2;
+ x = x4 / x3;
+ }
+
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* Verify straight-line strength reduction for simple integer subtraction. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a1, a2, a3, x1, x2, x3, x;
+
+ a1 = 2 * s;
+ x1 = c - a1;
+ a2 = 4 * s;
+ x2 = c - a2;
+ a3 = 6 * s;
+ x3 = c - a3;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,29 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+ x1 = x + 3;
+ a += x1 * stride;
+
+ if (n > 64)
+ {
+ x2 = x1 + 3;
+ a += x2 * stride;
+ x3 = x2 + 3;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,27 @@
+/* Straight-line strength reduction control flow variation. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int c, int s)
+{
+ int a, x1, x2, x3;
+
+ x1 = x2 = x3 = c;
+
+ if (n > 64)
+ {
+ a = 2 * s;
+ x1 = c + a;
+ a = 4 * s;
+ x2 = c + a;
+ a = 6 * s;
+ x3 = c + a;
+ }
+
+ return x1 + x2 + x3;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* Verify straight-line strength reduction for simple pointer subtraction. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int*
+f (int s, int *c)
+{
+ int a1, a2, a3, *x1, *x2, *x3;
+
+ a1 = 2 * s;
+ x1 = c - a1;
+ a2 = 4 * s;
+ x2 = c - a2;
+ a3 = 6 * s;
+ x3 = c - a3;
+ return x1 ? x2 : x3;
+}
+
+/* There are 3 ' * ' instances in the decls, 1 parm, 2 in the code. */
+/* { dg-final { scan-tree-dump-times " \\* " 6 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,31 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow, increment = 1. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+
+ if (n > 64)
+ {
+ x1 = x + 1;
+ a += x1 * stride;
+ x2 = x1 + 1;
+ a += x2 * stride;
+ }
+ else
+ {
+ x3 = x + 1;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,28 @@
+/* Straight-line strength reduction control flow variation. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int c, int s)
+{
+ int a2, a3, a4, x1, x2, x3, x4;
+
+ x1 = c + s;
+ a2 = 3 * s;
+ x2 = c + a2;
+ x3 = x4 = c;
+
+ if (n > 64)
+ {
+ a3 = 5 * s;
+ x3 = c + a3;
+ a4 = 7 * s;
+ x4 = c + a4;
+ }
+
+ return x1 + x2 + x3 + x4;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,23 @@
+/* Verify straight-line strength reduction for simple integer addition
+ with stride reversed. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int s, int c)
+{
+ int a1, a2, a3, x1, x2, x3, x;
+
+ a1 = 2 * s;
+ x1 = a1 + c;
+ a2 = 4 * s;
+ x2 = a2 + c;
+ a3 = 6 * s;
+ x3 = a3 + c;
+ x = x1 + x2 + x3;
+ return x;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,31 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow, increment = -1. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+
+ if (n > 64)
+ {
+ x1 = x - 1;
+ a += x1 * stride;
+ x2 = x1 - 1;
+ a += x2 * stride;
+ }
+ else
+ {
+ x3 = x - 1;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,31 @@
+/* Straight-line strength reduction control flow variation with incr = 1. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int c, int s)
+{
+ int a2, a3, a4, x1, x2, x3, x4;
+
+ x1 = c + s;
+ x2 = x3 = x4 = c;
+
+ if (n > 64)
+ {
+ a2 = 2 * s;
+ x2 = c + a2;
+ a3 = 3 * s;
+ x3 = c + a3;
+ }
+ else
+ {
+ a4 = 2 * s;
+ x4 = c + a4;
+ }
+
+ return x1 + x2 + x3 + x4;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 0 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,32 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with variable stride and control flow, increment = -3. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int x, int stride)
+{
+ int a, x1, x2, x3;
+
+ a = x * stride;
+
+ if (n > 64)
+ {
+ x1 = x - 3;
+ a += x1 * stride;
+ x2 = x1 - 3;
+ a += x2 * stride;
+ }
+ else
+ {
+ x3 = x - 3;
+ a += x3 * stride;
+ }
+
+ return a;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* -3" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,32 @@
+/* Straight-line strength reduction control flow variation with incr = -1. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int n, int c, int s)
+{
+ int a1, a2, a3, a4, x1, x2, x3, x4;
+
+ a1 = 4 * s;
+ x1 = c + a1;
+ x2 = x3 = x4 = c;
+
+ if (n > 64)
+ {
+ a2 = 3 * s;
+ x2 = c + a2;
+ a3 = 2 * s;
+ x3 = c + a3;
+ }
+ else
+ {
+ a4 = 3 * s;
+ x4 = c + a4;
+ }
+
+ return x1 + x2 + x3 + x4;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* Verify straight-line strength reduction for multiply candidates
+ with stride in RHS1 position. */
+
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+f (int c, int s)
+{
+ int x1, x2, y1, y2;
+
+ y1 = c + 2;
+ x1 = s * y1;
+ y2 = y1 + 2;
+ x2 = s * y2;
+ return x1 + x2;
+}
+
+/* { dg-final { scan-tree-dump-times " \\* y" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times " \\* 2" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
+
===================================================================
@@ -92,10 +92,9 @@ along with GCC; see the file COPYING3. If not see
#include "tree-inline.h"
#include "tree-ssa-propagate.h"
-/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
+/* FIXME: zero_cost defined in exprmed.h conflicts with local uses.
*/
#include "expmed.h"
-#undef add_cost
#undef zero_cost
/* FIXME: Expressions are expanded to RTL in this pass to determine the
@@ -3051,8 +3050,8 @@ adjust_setup_cost (struct ivopts_data *data, unsig
/* Returns cost of addition in MODE. */
-static unsigned
-add_cost (enum machine_mode mode, bool speed)
+unsigned
+add_regs_cost (enum machine_mode mode, bool speed)
{
static unsigned costs[NUM_MACHINE_MODES];
rtx seq;
@@ -3081,6 +3080,148 @@ adjust_setup_cost (struct ivopts_data *data, unsig
return cost;
}
+/* Returns cost of multiplication in MODE. */
+
+unsigned
+multiply_regs_cost (enum machine_mode mode, bool speed)
+{
+ static unsigned costs[NUM_MACHINE_MODES];
+ rtx seq;
+ unsigned cost;
+
+ if (costs[mode])
+ return costs[mode];
+
+ start_sequence ();
+ force_operand (gen_rtx_fmt_ee (MULT, mode,
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
+ NULL_RTX);
+ seq = get_insns ();
+ end_sequence ();
+
+ cost = seq_cost (seq, speed);
+ if (!cost)
+ cost = 1;
+
+ costs[mode] = cost;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Multiplication in %s costs %d\n",
+ GET_MODE_NAME (mode), cost);
+ return cost;
+}
+
+/* Returns cost of addition with a constant in MODE. */
+
+unsigned
+add_const_cost (enum machine_mode mode, bool speed)
+{
+ static unsigned costs[NUM_MACHINE_MODES];
+ rtx seq;
+ unsigned cost;
+
+ if (costs[mode])
+ return costs[mode];
+
+ /* Arbitrarily generate insns for x + 2, as the exact constant
+ shouldn't matter. */
+ start_sequence ();
+ force_operand (gen_rtx_fmt_ee (PLUS, mode,
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
+ gen_int_mode (2, mode)),
+ NULL_RTX);
+ seq = get_insns ();
+ end_sequence ();
+
+ cost = seq_cost (seq, speed);
+ if (!cost)
+ cost = 1;
+
+ costs[mode] = cost;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Addition to constant in %s costs %d\n",
+ GET_MODE_NAME (mode), cost);
+ return cost;
+}
+
+/* Returns cost of extend or truncate in MODE. */
+
+unsigned
+extend_or_trunc_cost (tree type_to, tree type_from, bool speed)
+{
+ static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
+ rtx seq;
+ unsigned cost;
+ enum machine_mode mode_to = TYPE_MODE (type_to);
+ enum machine_mode mode_from = TYPE_MODE (type_from);
+ tree size_to = TYPE_SIZE (type_to);
+ tree size_from = TYPE_SIZE (type_from);
+ enum rtx_code code;
+
+ gcc_assert (TREE_CODE (size_to) == INTEGER_CST
+ && TREE_CODE (size_from) == INTEGER_CST);
+
+ if (costs[mode_to][mode_from])
+ return costs[mode_to][mode_from];
+
+ if (tree_int_cst_lt (size_to, size_from))
+ code = TRUNCATE;
+ else if (TYPE_UNSIGNED (type_to))
+ code = ZERO_EXTEND;
+ else
+ code = SIGN_EXTEND;
+
+ start_sequence ();
+ gen_rtx_fmt_e (code, mode_to,
+ gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1));
+ seq = get_insns ();
+ end_sequence ();
+
+ cost = seq_cost (seq, speed);
+ if (!cost)
+ cost = 1;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Conversion from %s to %s costs %d\n",
+ GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost);
+
+ costs[mode_to][mode_from] = cost;
+ return cost;
+}
+
+/* Returns cost of negation in MODE. */
+
+unsigned
+negate_cost (enum machine_mode mode, bool speed)
+{
+ static unsigned costs[NUM_MACHINE_MODES];
+ rtx seq;
+ unsigned cost;
+
+ if (costs[mode])
+ return costs[mode];
+
+ start_sequence ();
+ force_operand (gen_rtx_fmt_e (NEG, mode,
+ gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)),
+ NULL_RTX);
+ seq = get_insns ();
+ end_sequence ();
+
+ cost = seq_cost (seq, speed);
+ if (!cost)
+ cost = 1;
+
+ costs[mode] = cost;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Negation in %s costs %d\n",
+ GET_MODE_NAME (mode), cost);
+ return cost;
+}
+
/* Entry in a hashtable of already known costs for multiplication. */
struct mbc_entry
{
@@ -3408,7 +3549,7 @@ get_address_cost (bool symbol_present, bool var_pr
If VAR_PRESENT is true, try whether the mode with
SYMBOL_PRESENT = false is cheaper even with cost of addition, and
if this is the case, use it. */
- add_c = add_cost (address_mode, speed);
+ add_c = add_regs_cost (address_mode, speed);
for (i = 0; i < 8; i++)
{
var_p = i & 1;
@@ -3492,7 +3633,7 @@ get_address_cost (bool symbol_present, bool var_pr
cost += multiply_by_cost (ratio, address_mode, speed);
if (s_offset && !offset_p && !symbol_present)
- cost += add_cost (address_mode, speed);
+ cost += add_regs_cost (address_mode, speed);
if (may_autoinc)
*may_autoinc = autoinc;
@@ -3659,7 +3800,7 @@ force_expr_to_var_cost (tree expr, bool speed)
case PLUS_EXPR:
case MINUS_EXPR:
case NEGATE_EXPR:
- cost = new_cost (add_cost (mode, speed), 0);
+ cost = new_cost (add_regs_cost (mode, speed), 0);
if (TREE_CODE (expr) != NEGATE_EXPR)
{
tree mult = NULL_TREE;
@@ -4154,7 +4295,7 @@ get_computation_cost_at (struct ivopts_data *data,
&symbol_present, &var_present,
&offset, depends_on));
cost.cost /= avg_loop_niter (data->current_loop);
- cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
+ cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed);
}
if (inv_expr_id)
@@ -4195,14 +4336,14 @@ get_computation_cost_at (struct ivopts_data *data,
are added once to the variable, if present. */
if (var_present && (symbol_present || offset))
cost.cost += adjust_setup_cost (data,
- add_cost (TYPE_MODE (ctype), speed));
+ add_regs_cost (TYPE_MODE (ctype), speed));
/* Having offset does not affect runtime cost in case it is added to
symbol, but it increases complexity. */
if (offset)
cost.complexity++;
- cost.cost += add_cost (TYPE_MODE (ctype), speed);
+ cost.cost += add_regs_cost (TYPE_MODE (ctype), speed);
aratio = ratio > 0 ? ratio : -ratio;
if (aratio != 1)
@@ -5052,7 +5193,7 @@ determine_iv_cost (struct ivopts_data *data, struc
or a const set. */
if (cost_base.cost == 0)
cost_base.cost = COSTS_N_INSNS (1);
- cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
+ cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
cost = cost_step + adjust_setup_cost (data, cost_base.cost);
===================================================================
@@ -253,6 +253,7 @@ DEFTIMEVAR (TV_TREE_IFCOMBINE , "tree if-co
DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis")
DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization")
DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution")
+DEFTIMEVAR (TV_TREE_SLSR , "straight-line strength reduction")
/* Everything else in rest_of_compilation not included above. */
DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes")
===================================================================
@@ -0,0 +1,2355 @@
+/* Straight-line strength reduction.
+ Copyright (C) 2012 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* There are many algorithms for performing strength reduction on
+ loops. This is not one of them. IVOPTS handles strength reduction
+ of induction variables just fine. This pass is intended to pick
+ up the crumbs it leaves behind, by considering opportunities for
+ strength reduction along dominator paths.
+
+ Strength reduction will be implemented in four stages, gradually
+ adding more complex candidates:
+
+ 1) Explicit multiplies, known constant multipliers, no
+ conditional increments. (complete)
+ 2) Explicit multiplies, unknown constant multipliers,
+ no conditional increments. (complete)
+ 3) Explicit multiplies, conditional increments.
+ 4) Implicit multiplies in addressing expressions.
+
+ It would also be possible to apply strength reduction to divisions
+ and modulos, but such opportunities are relatively uncommon.
+
+ Strength reduction is also currently restricted to integer operations.
+ If desired, it could be extended to floating-point operations under
+ control of something like -funsafe-math-optimizations. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "gimple.h"
+#include "basic-block.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "alloc-pool.h"
+#include "tree-flow.h"
+#include "domwalk.h"
+
+/* Information about a strength reduction candidate. Each statement
+ in the candidate represents an expression of one of the following
+ forms:
+
+ (CAND_MULT) S1: X = (B + i) * S
+ (CAND_ADD) S1: X = B + (i * S)
+
+ Here X and B are SSA names, i is an integer constant, and S is
+ either an SSA name or a constant. We call B the "base," i the
+ "index", and S the "stride."
+
+ Any statement S0 that dominates S1 and is of the form:
+
+ (CAND_MULT) S0: Y = (B + i') * S
+ (CAND_ADD) S0: Y = B + (i' * S)
+
+ is called a "basis" for S1. In both cases, S1 may be replaced by
+
+ S1': X = Y + (i - i') * S,
+
+ where (i - i') * S is folded to the extent possible.
+
+ All gimple statements are visited in dominator order, and each
+ statement that may contribute to one of the forms of S1 above is
+ given at least one entry in the candidate table. Such statements
+ include addition, pointer addition, subtraction, multiplication,
+ negation, copies, and nontrivial type casts. If a statement may
+ represent more than one expression of the forms of S1 above,
+ multiple "interpretations" are stored in the table and chained
+ together. Examples:
+
+ * An add of two SSA names may treat either operand as the base.
+ * A multiply of two SSA names, likewise.
+ * A copy or cast may be thought of as either a CAND_MULT with
+ i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
+
+ Candidate records are allocated from an allocation pool. They are
+ addressed both from a hash table keyed on S1, and from a vector of
+ candidate pointers arranged in predominator order.
+
+ Opportunity note
+ ----------------
+ Currently we don't recognize:
+
+ S0: Y = (S * i') - B
+ S1: X = (S * i) - B
+
+ as a strength reduction opportunity, even though this S1 would
+ also be replaceable by the S1' above. This can be added if it
+ comes up in practice. */
+
+/* Index into the candidate vector, offset by 1. VECs are zero-based,
+ while cand_idx's are one-based, with zero indicating null. */
+
+typedef unsigned cand_idx;
+
+/* The kind of candidate. */
+
+enum cand_kind
+{
+ CAND_MULT,
+ CAND_ADD
+};
+
+struct slsr_cand_d
+{
+ /* The candidate statement S1. */
+ gimple cand_stmt;
+
+ /* The base SSA name B. */
+ tree base_name;
+
+ /* The stride S. */
+ tree stride;
+
+ /* The index constant i. */
+ double_int index;
+
+ /* The type of the candidate. This is normally the type of base_name,
+ but casts may have occurred when combining feeding instructions.
+ A candidate can only be a basis for candidates of the same final type. */
+ tree cand_type;
+
+ /* The kind of candidate (CAND_MULT, etc.). */
+ enum cand_kind kind;
+
+ /* Index of this candidate in the candidate vector. */
+ cand_idx cand_num;
+
+ /* Index of the next candidate record for the same statement.
+ A statement may be useful in more than one way (e.g., due to
+ commutatitivity). So we can have multiple "interpretations"
+ of a statement. */
+ cand_idx next_interp;
+
+ /* Index of the basis statement S0, if any, in the candidate vector. */
+ cand_idx basis;
+
+ /* First candidate for which this candidate is a basis, if one exists. */
+ cand_idx dependent;
+
+ /* Next candidate having the same basis as this one. */
+ cand_idx sibling;
+
+ /* If this is a conditional candidate, the defining PHI statement
+ for the base SSA name B. For future use; always NULL for now. */
+ gimple def_phi;
+
+ /* Access to the statement for subsequent modification. Cached to
+ save compile time. */
+ gimple_stmt_iterator cand_gsi;
+
+ /* Savings that can be expected from eliminating dead code if this
+ candidate is replaced. */
+ int dead_savings;
+};
+
+typedef struct slsr_cand_d slsr_cand, *slsr_cand_t;
+typedef const struct slsr_cand_d *const_slsr_cand_t;
+
+/* Pointers to candidates are chained together as part of a mapping
+ from SSA names to the candidates that use them as a base name. */
+
+struct cand_chain_d
+{
+ /* SSA name that serves as a base name for the chain of candidates. */
+ tree base_name;
+
+ /* Pointer to a candidate. */
+ slsr_cand_t cand;
+
+ /* Chain pointer. */
+ struct cand_chain_d *next;
+
+};
+
+typedef struct cand_chain_d cand_chain, *cand_chain_t;
+typedef const struct cand_chain_d *const_cand_chain_t;
+
+/* Information about a unique "increment" associated with candidates
+ having an SSA name for a stride. An increment is the difference
+ between the index of the candidate and the index of its basis,
+ i.e., (i - i') as discussed in the module commentary. */
+
+struct incr_info_d
+{
+ /* The increment that relates a candidate to its basis. */
+ double_int incr;
+
+ /* How many times the increment occurs in the candidate tree. */
+ unsigned count;
+
+ /* Cost of replacing candidates using this increment. Negative and
+ zero costs indicate replacement should be performed. */
+ int cost;
+
+ /* If this increment is profitable but is not -1, 0, or 1, it requires
+ an initializer T_0 = stride * incr to be found or introduced in the
+ nearest common dominator of all candidates. This field holds T_0
+ for subsequent use. */
+ tree initializer;
+
+ /* If the initializer was found to already exist, this is the block
+ where it was found. */
+ basic_block init_bb;
+
+};
+
+typedef struct incr_info_d incr_info, *incr_info_t;
+
+/* Candidates are maintained in a vector. If candidate X dominates
+ candidate Y, then X appears before Y in the vector; but the
+ converse does not necessarily hold. */
+DEF_VEC_P (slsr_cand_t);
+DEF_VEC_ALLOC_P (slsr_cand_t, heap);
+static VEC (slsr_cand_t, heap) *cand_vec;
+
+enum cost_consts
+{
+ COST_NEUTRAL = 0,
+ COST_INFINITE = 1000
+};
+
+/* Hash table embodying a mapping from statements to candidates. */
+static htab_t stmt_cand_map;
+
+/* Allocation pool for candidates. */
+static alloc_pool cand_pool;
+
+/* Hash table embodying a mapping from base names to chains of candidates. */
+static htab_t base_cand_map;
+
+/* Allocation pool for candidate chains. */
+static alloc_pool chain_pool;
+
+/* An array INCR_VEC of incr_infos is used during analysis of related
+ candidates having an SSA name for a stride. INCR_VEC_LEN describes
+ its current length. */
+static incr_info_t incr_vec;
+static unsigned incr_vec_len;
+
+/* Produce a pointer to the IDX'th candidate in the candidate vector. */
+
+static slsr_cand_t
+lookup_cand (cand_idx idx)
+{
+ return VEC_index (slsr_cand_t, cand_vec, idx - 1);
+}
+
+/* Callback to produce a hash value for a candidate. */
+
+static hashval_t
+stmt_cand_hash (const void *p)
+{
+ return htab_hash_pointer (((const_slsr_cand_t) p)->cand_stmt);
+}
+
+/* Callback when an element is removed from the hash table.
+ We never remove entries until the entire table is released. */
+
+static void
+stmt_cand_free (void *p ATTRIBUTE_UNUSED)
+{
+}
+
+/* Callback to return true if two candidates are equal. */
+
+static int
+stmt_cand_eq (const void *p1, const void *p2)
+{
+ const_slsr_cand_t const cand1 = (const_slsr_cand_t) p1;
+ const_slsr_cand_t const cand2 = (const_slsr_cand_t) p2;
+ return (cand1->cand_stmt == cand2->cand_stmt);
+}
+
+/* Callback to produce a hash value for a candidate chain header. */
+
+static hashval_t
+base_cand_hash (const void *p)
+{
+ tree ssa_name = ((const_cand_chain_t) p)->base_name;
+
+ if (TREE_CODE (ssa_name) != SSA_NAME)
+ return (hashval_t) 0;
+
+ return (hashval_t) SSA_NAME_VERSION (ssa_name);
+}
+
+/* Callback when an element is removed from the hash table.
+ We never remove entries until the entire table is released. */
+
+static void
+base_cand_free (void *p ATTRIBUTE_UNUSED)
+{
+}
+
+/* Callback to return true if two candidate chain headers are equal. */
+
+static int
+base_cand_eq (const void *p1, const void *p2)
+{
+ const_cand_chain_t const chain1 = (const_cand_chain_t) p1;
+ const_cand_chain_t const chain2 = (const_cand_chain_t) p2;
+ return operand_equal_p (chain1->base_name, chain2->base_name, 0);
+}
+
+/* Use the base name from candidate C to look for possible candidates
+ that can serve as a basis for C. Each potential basis must also
+ appear in a block that dominates the candidate statement and have
+ the same stride and type. If more than one possible basis exists,
+ the one with highest index in the vector is chosen; this will be
+ the most immediately dominating basis. */
+
+static int
+find_basis_for_candidate (slsr_cand_t c)
+{
+ cand_chain mapping_key;
+ cand_chain_t chain;
+ slsr_cand_t basis = NULL;
+
+ mapping_key.base_name = c->base_name;
+ chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key);
+
+ for (; chain; chain = chain->next)
+ {
+ slsr_cand_t one_basis = chain->cand;
+
+ if (one_basis->kind != c->kind
+ || !operand_equal_p (one_basis->stride, c->stride, 0)
+ || !types_compatible_p (one_basis->cand_type, c->cand_type)
+ || !dominated_by_p (CDI_DOMINATORS,
+ gimple_bb (c->cand_stmt),
+ gimple_bb (one_basis->cand_stmt)))
+ continue;
+
+ if (!basis || basis->cand_num < one_basis->cand_num)
+ basis = one_basis;
+ }
+
+ if (basis)
+ {
+ c->sibling = basis->dependent;
+ basis->dependent = c->cand_num;
+ return basis->cand_num;
+ }
+
+ return 0;
+}
+
+/* Record a mapping from the base name of C to C itself, indicating that
+ C may potentially serve as a basis using that base name. */
+
+static void
+record_potential_basis (slsr_cand_t c)
+{
+ void **slot;
+ cand_chain_t node;
+
+ node = (cand_chain_t) pool_alloc (chain_pool);
+ node->base_name = c->base_name;
+ node->cand = c;
+ node->next = NULL;
+ slot = htab_find_slot (base_cand_map, node, INSERT);
+
+ if (*slot)
+ {
+ cand_chain_t head = (cand_chain_t) (*slot);
+ node->next = head->next;
+ head->next = node;
+ }
+ else
+ *slot = node;
+}
+
+/* Allocate storage for a new candidate and initialize its fields.
+ Attempt to find a basis for the candidate. */
+
+static slsr_cand_t
+alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base,
+ double_int index, tree stride, tree ctype,
+ unsigned savings, gimple_stmt_iterator gsi)
+{
+ slsr_cand_t c = (slsr_cand_t) pool_alloc (cand_pool);
+ c->cand_stmt = gs;
+ c->base_name = base;
+ c->stride = stride;
+ c->index = index;
+ c->cand_type = ctype;
+ c->kind = kind;
+ c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1;
+ c->next_interp = 0;
+ c->dependent = 0;
+ c->sibling = 0;
+ c->def_phi = NULL;
+ c->cand_gsi = gsi;
+ c->dead_savings = savings;
+
+ VEC_safe_push (slsr_cand_t, heap, cand_vec, c);
+ c->basis = find_basis_for_candidate (c);
+ record_potential_basis (c);
+
+ return c;
+}
+
+/* Determine the target cost of statement GS when compiling according
+ to SPEED. */
+
+static int
+stmt_cost (gimple gs, bool speed)
+{
+ tree lhs, rhs1, rhs2;
+ enum machine_mode lhs_mode;
+
+ gcc_assert (is_gimple_assign (gs));
+ lhs = gimple_assign_lhs (gs);
+ rhs1 = gimple_assign_rhs1 (gs);
+ lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
+
+ switch (gimple_assign_rhs_code (gs))
+ {
+ case MULT_EXPR:
+ rhs2 = gimple_assign_rhs2 (gs);
+
+ if (TREE_CODE (rhs2) == INTEGER_CST)
+ return multiply_by_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed);
+
+ if (TREE_CODE (rhs1) == INTEGER_CST)
+ return multiply_by_cost (TREE_INT_CST_LOW (rhs1), lhs_mode, speed);
+
+ return multiply_regs_cost (TYPE_MODE (TREE_TYPE (lhs)), speed);
+
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ case MINUS_EXPR:
+ rhs2 = gimple_assign_rhs2 (gs);
+
+ if (TREE_CODE (rhs2) == INTEGER_CST)
+ return add_const_cost (TYPE_MODE (TREE_TYPE (rhs1)), speed);
+
+ if (TREE_CODE (rhs1) == INTEGER_CST)
+ return add_const_cost (TYPE_MODE (TREE_TYPE (rhs2)), speed);
+
+ return add_regs_cost (lhs_mode, speed);
+
+ case NEGATE_EXPR:
+ return negate_cost (lhs_mode, speed);
+
+ case NOP_EXPR:
+ return extend_or_trunc_cost (TREE_TYPE (lhs), TREE_TYPE (rhs1), speed);
+
+ case MODIFY_EXPR:
+ /* Be suspicious of assigning costs to copies that may well go away. */
+ return 0;
+
+ default:
+ ;
+ }
+
+ gcc_unreachable ();
+ return 0;
+}
+
+/* Look up the defining statement for BASE_IN and return a pointer
+ to its candidate in the candidate table, if any; otherwise NULL. */
+
+static slsr_cand_t
+base_cand_from_table (tree base_in)
+{
+ slsr_cand mapping_key;
+
+ gimple def = SSA_NAME_DEF_STMT (base_in);
+ if (!def)
+ return (slsr_cand_t) NULL;
+
+ mapping_key.cand_stmt = def;
+ return (slsr_cand_t) htab_find (stmt_cand_map, &mapping_key);
+}
+
+/* Create a candidate entry for a statement GS at GSI, where GS
+ multiplies two SSA names BASE_IN and STRIDE_IN. Propagate any
+ known information about the two SSA names into the new candidate.
+ Return the new candidate. */
+
+static slsr_cand_t
+create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in,
+ gimple_stmt_iterator gsi, bool speed)
+{
+ tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
+ double_int index;
+ unsigned savings = 0;
+ slsr_cand_t c;
+ slsr_cand_t base_cand = base_cand_from_table (base_in);
+
+ /* Look at all interpretations of the base candidate, if necessary,
+ to find information to propagate into this candidate. */
+ while (base_cand && !base)
+ {
+
+ if (base_cand->kind == CAND_MULT
+ && operand_equal_p (base_cand->stride, integer_one_node, 0))
+ {
+ /* Y = (B + i') * 1
+ X = Y * Z
+ ================
+ X = (B + i') * Z */
+ base = base_cand->base_name;
+ index = base_cand->index;
+ stride = stride_in;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+ else if (base_cand->kind == CAND_ADD
+ && TREE_CODE (base_cand->stride) == INTEGER_CST)
+ {
+ /* Y = B + (i' * S), S constant
+ X = Y * Z
+ ============================
+ X = B + ((i' * S) * Z) */
+ base = base_cand->base_name;
+ index = double_int_mul (base_cand->index,
+ tree_to_double_int (base_cand->stride));
+ stride = stride_in;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+
+ if (!base)
+ {
+ /* No interpretations had anything useful to propagate, so
+ produce X = (Y + 0) * Z. */
+ base = base_in;
+ index = double_int_zero;
+ stride = stride_in;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ }
+
+ c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
+ ctype, savings, gsi);
+ return c;
+}
+
+/* Create a candidate entry for a statement GS at GSI, where GS
+ multiplies SSA name BASE_IN by constant STRIDE_IN. Propagate any
+ known information about BASE_IN into the new candidate. Return
+ the new candidate. */
+
+static slsr_cand_t
+create_mul_imm_cand (gimple gs, tree base_in, tree stride_in,
+ gimple_stmt_iterator gsi, bool speed)
+{
+ tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
+ double_int index, temp;
+ unsigned savings = 0;
+ slsr_cand_t c;
+ slsr_cand_t base_cand = base_cand_from_table (base_in);
+
+ /* Look at all interpretations of the base candidate, if necessary,
+ to find information to propagate into this candidate. */
+ while (base_cand && !base)
+ {
+ if (base_cand->kind == CAND_MULT
+ && TREE_CODE (base_cand->stride) == INTEGER_CST)
+ {
+ /* Y = (B + i') * S, S constant
+ X = Y * c
+ ============================
+ X = (B + i') * (S * c) */
+ base = base_cand->base_name;
+ index = base_cand->index;
+ temp = double_int_mul (tree_to_double_int (base_cand->stride),
+ tree_to_double_int (stride_in));
+ stride = double_int_to_tree (TREE_TYPE (stride_in), temp);
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+ else if (base_cand->kind == CAND_ADD
+ && operand_equal_p (base_cand->stride, integer_one_node, 0))
+ {
+ /* Y = B + (i' * 1)
+ X = Y * c
+ ===========================
+ X = (B + i') * c */
+ base = base_cand->base_name;
+ index = base_cand->index;
+ stride = stride_in;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+ else if (base_cand->kind == CAND_ADD
+ && double_int_one_p (base_cand->index)
+ && TREE_CODE (base_cand->stride) == INTEGER_CST)
+ {
+ /* Y = B + (1 * S), S constant
+ X = Y * c
+ ===========================
+ X = (B + S) * c */
+ base = base_cand->base_name;
+ index = tree_to_double_int (base_cand->stride);
+ stride = stride_in;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+
+ if (!base)
+ {
+ /* No interpretations had anything useful to propagate, so
+ produce X = (Y + 0) * c. */
+ base = base_in;
+ index = double_int_zero;
+ stride = stride_in;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ }
+
+ c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
+ ctype, savings, gsi);
+ return c;
+}
+
+/* Given GS which is a multiply of scalar integers, make an appropriate
+ entry in the candidate table. If this is a multiply of two SSA names,
+ create two CAND_MULT interpretations and attempt to find a basis for
+ each of them. Otherwise, create a single CAND_MULT and attempt to
+ find a basis. */
+
+static void
+slsr_process_mul (gimple_stmt_iterator gsi, gimple gs, bool speed)
+{
+ tree rhs1 = gimple_assign_rhs1 (gs);
+ tree rhs2 = gimple_assign_rhs2 (gs);
+ slsr_cand_t c, c2;
+ void **slot;
+
+ if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != SSA_NAME)
+ return;
+
+ /* If this is a multiply of an SSA name with itself, it is highly
+ unlikely that we will get a strength reduction opportunity, so
+ don't record it as a candidate. This simplifies the logic for
+ finding a basis, so if this is removed that must be considered. */
+ if (TREE_CODE (rhs2) == SSA_NAME && operand_equal_p (rhs1, rhs2, 0))
+ return;
+
+ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME)
+ {
+ /* Record an interpretation of this statement in the candidate table
+ assuming RHS1 is the base name and RHS2 is the stride. */
+ c = create_mul_ssa_cand (gs, rhs1, rhs2, gsi, speed);
+
+ /* Add the first interpretation to the statement-candidate mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+
+ /* Record another interpretation of this statement assuming RHS1
+ is the stride and RHS2 is the base name. */
+ c2 = create_mul_ssa_cand (gs, rhs2, rhs1, gsi, speed);
+ c->next_interp = c2->cand_num;
+ }
+ else
+ {
+ /* Canonicalize the multiply-immediate case. */
+ if (TREE_CODE (rhs1) == INTEGER_CST)
+ {
+ tree swapper = rhs1;
+ rhs1 = rhs2;
+ rhs2 = swapper;
+ }
+
+ if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != INTEGER_CST)
+ return;
+
+ /* Record an interpretation for the multiply-immediate. */
+ c = create_mul_imm_cand (gs, rhs1, rhs2, gsi, speed);
+
+ /* Add the interpretation to the statement-candidate mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+ }
+}
+
+/* Create a candidate entry for a statement GS at GSI, where GS
+ adds two SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false,
+ and subtracts ADDEND_IN from BASE_IN otherwise. Propagate any
+ known information about the two SSA names into the new candidate.
+ Return the new candidate. */
+
+static slsr_cand_t
+create_add_ssa_cand (gimple gs, tree base_in, tree addend_in,
+ bool subtract_p, gimple_stmt_iterator gsi, bool speed)
+{
+ tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL;
+ double_int index;
+ unsigned savings = 0;
+ slsr_cand_t c;
+ slsr_cand_t base_cand = base_cand_from_table (base_in);
+ slsr_cand_t addend_cand = base_cand_from_table (addend_in);
+
+ /* The most useful transformation is a multiply-immediate feeding
+ an add or subtract. Look for that first. */
+ while (addend_cand && !base)
+ {
+ if (addend_cand->kind == CAND_MULT
+ && double_int_zero_p (addend_cand->index)
+ && TREE_CODE (addend_cand->stride) == INTEGER_CST)
+ {
+ /* Z = (B + 0) * S, S constant
+ X = Y +/- Z
+ ===========================
+ X = Y + ((+/-1 * S) * B) */
+ base = base_in;
+ index = tree_to_double_int (addend_cand->stride);
+ if (subtract_p)
+ index = double_int_neg (index);
+ stride = addend_cand->base_name;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ if (has_single_use (addend_in))
+ savings = (addend_cand->dead_savings
+ + stmt_cost (addend_cand->cand_stmt, speed));
+ }
+
+ if (addend_cand->next_interp)
+ addend_cand = lookup_cand (addend_cand->next_interp);
+ else
+ addend_cand = NULL;
+ }
+
+ while (base_cand && !base)
+ {
+ if (base_cand->kind == CAND_ADD
+ && (double_int_zero_p (base_cand->index)
+ || operand_equal_p (base_cand->stride,
+ integer_zero_node, 0)))
+ {
+ /* Y = B + (i' * S), i' * S = 0
+ X = Y +/- Z
+ ============================
+ X = B + (+/-1 * Z) */
+ base = base_cand->base_name;
+ index = subtract_p ? double_int_minus_one : double_int_one;
+ stride = addend_in;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+ else if (subtract_p)
+ {
+ slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
+
+ while (subtrahend_cand && !base)
+ {
+ if (subtrahend_cand->kind == CAND_MULT
+ && double_int_zero_p (subtrahend_cand->index)
+ && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
+ {
+ /* Z = (B + 0) * S, S constant
+ X = Y - Z
+ ===========================
+ Value: X = Y + ((-1 * S) * B) */
+ base = base_in;
+ index = tree_to_double_int (subtrahend_cand->stride);
+ index = double_int_neg (index);
+ stride = subtrahend_cand->base_name;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ if (has_single_use (addend_in))
+ savings = (subtrahend_cand->dead_savings
+ + stmt_cost (subtrahend_cand->cand_stmt, speed));
+ }
+
+ if (subtrahend_cand->next_interp)
+ subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
+ else
+ subtrahend_cand = NULL;
+ }
+ }
+
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+
+ if (!base)
+ {
+ /* No interpretations had anything useful to propagate, so
+ produce X = Y + (1 * Z). */
+ base = base_in;
+ index = subtract_p ? double_int_minus_one : double_int_one;
+ stride = addend_in;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ }
+
+ c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
+ ctype, savings, gsi);
+ return c;
+}
+
+/* Return TRUE iff PRODUCT is an integral multiple of FACTOR, and return
+ the multiple in *MULTIPLE. Otherwise return FALSE and leave *MULTIPLE
+ unchanged. */
+/* ??? - Should this be moved to double-int.c? */
+
+static bool
+double_int_multiple_of (double_int product, double_int factor,
+ bool unsigned_p, double_int *multiple)
+{
+ double_int remainder;
+ double_int quotient = double_int_divmod (product, factor, unsigned_p,
+ TRUNC_DIV_EXPR, &remainder);
+ if (double_int_zero_p (remainder))
+ {
+ *multiple = quotient;
+ return true;
+ }
+
+ return false;
+}
+
+/* Create a candidate entry for a statement GS at GSI, where GS
+ adds SSA name BASE_IN to constant INDEX_IN. Propagate any
+ known information about BASE_IN into the new candidate. Return
+ the new candidate. */
+
+static slsr_cand_t
+create_add_imm_cand (gimple gs, tree base_in, double_int index_in,
+ gimple_stmt_iterator gsi, bool speed)
+{
+ enum cand_kind kind = CAND_ADD;
+ tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
+ double_int index, multiple;
+ unsigned savings = 0;
+ slsr_cand_t c;
+ slsr_cand_t base_cand = base_cand_from_table (base_in);
+
+ while (base_cand && !base)
+ {
+ bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride));
+
+ if (TREE_CODE (base_cand->stride) == INTEGER_CST
+ && double_int_multiple_of (index_in,
+ tree_to_double_int (base_cand->stride),
+ unsigned_p,
+ &multiple))
+ {
+ /* Y = (B + i') * S, S constant, c = kS for some integer k
+ X = Y + c
+ ============================
+ X = (B + (i'+ k)) * S
+ OR
+ Y = B + (i' * S), S constant, c = kS for some integer k
+ X = Y + c
+ ============================
+ X = (B + (i'+ k)) * S */
+ kind = base_cand->kind;
+ base = base_cand->base_name;
+ index = double_int_add (base_cand->index, multiple);
+ stride = base_cand->stride;
+ ctype = base_cand->cand_type;
+ if (has_single_use (base_in))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+ }
+
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+
+ if (!base)
+ {
+ /* No interpretations had anything useful to propagate, so
+ produce X = Y + (c * 1). */
+ kind = CAND_ADD;
+ base = base_in;
+ index = index_in;
+ stride = integer_one_node;
+ ctype = TREE_TYPE (SSA_NAME_VAR (base_in));
+ }
+
+ c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
+ ctype, savings, gsi);
+ return c;
+}
+
+/* Given GS which is an add or subtract of scalar integers or pointers,
+ make at least one appropriate entry in the candidate table. */
+
+static void
+slsr_process_add (gimple_stmt_iterator gsi, gimple gs, bool speed)
+{
+ tree rhs1 = gimple_assign_rhs1 (gs);
+ tree rhs2 = gimple_assign_rhs2 (gs);
+ bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
+ slsr_cand_t c, c2;
+ void **slot;
+
+ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME)
+ {
+ /* First record an interpretation assuming RHS1 is the base
+ name and RHS2 is the stride. */
+ c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, gsi, speed);
+
+ /* Add the first interpretation to the statement-candidate mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+
+ /* If the two RHS operands are identical, or this is a subtract,
+ we're done. */
+ if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
+ return;
+
+ /* Otherwise, record another interpretation assuming RHS2 is the
+ base name and RHS1 is the stride. */
+ c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, gsi, speed);
+ c->next_interp = c2->cand_num;
+ }
+ else
+ {
+ double_int index;
+
+ /* Canonicalize the add-immediate. */
+ if (!subtract_p && TREE_CODE (rhs1) == INTEGER_CST)
+ {
+ tree swapper = rhs1;
+ rhs1 = rhs2;
+ rhs2 = swapper;
+ }
+
+ if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != INTEGER_CST)
+ return;
+
+ /* Record an interpretation for the add-immediate. */
+ index = tree_to_double_int (rhs2);
+ if (subtract_p)
+ index = double_int_neg (index);
+
+ c = create_add_imm_cand (gs, rhs1, index, gsi, speed);
+
+ /* Add the interpretation to the statement-candidate mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+ }
+}
+
+/* Given GS which is a negate of a scalar integer, make an appropriate
+ entry in the candidate table. A negate is equivalent to a multiply
+ by -1. */
+
+static void
+slsr_process_neg (gimple_stmt_iterator gsi, gimple gs, bool speed)
+{
+ /* Record a CAND_MULT interpretation for the multiply by -1. */
+ tree rhs1 = gimple_assign_rhs1 (gs);
+ slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node,
+ gsi, speed);
+
+ /* Add the interpretation to the statement-candidate mapping. */
+ void **slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+}
+
+/* Return TRUE if GS is a statement that defines an SSA name from
+ a NOP_EXPR and is legal for us to combine an add and multiply
+ across. This is legitimate for casts from a signed type to
+ a signed or unsigned type of the same or larger size. It is not
+ legitimate to convert any unsigned type to a signed type, or
+ to an unsigned type of a different size.
+
+ The reasoning here is that signed integer overflow is undefined,
+ so any program that was expecting overflow that no longer occurs
+ is not correct. Unsigned integers, however, have wrap semantics,
+ and it is reasonable for programs to assume an overflowing add
+ will wrap. */
+
+static bool
+legal_cast_p (gimple gs)
+{
+ tree lhs, rhs, lhs_type, rhs_type;
+ unsigned lhs_size, rhs_size, lhs_uns, rhs_uns;
+
+ if (!is_gimple_assign (gs)
+ || TREE_CODE (gimple_assign_lhs (gs)) != SSA_NAME
+ || gimple_assign_rhs_code (gs) != NOP_EXPR)
+ return false;
+
+ lhs = gimple_assign_lhs (gs);
+ rhs = gimple_assign_rhs1 (gs);
+
+ if (TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ lhs_type = TREE_TYPE (SSA_NAME_VAR (lhs));
+ rhs_type = TREE_TYPE (SSA_NAME_VAR (rhs));
+ lhs_size = TYPE_PRECISION (lhs_type);
+ rhs_size = TYPE_PRECISION (rhs_type);
+ lhs_uns = TYPE_UNSIGNED (lhs_type);
+ rhs_uns = TYPE_UNSIGNED (rhs_type);
+
+ if (lhs_size < rhs_size)
+ return false;
+
+ if (rhs_uns && !lhs_uns)
+ return false;
+
+ if (rhs_uns && lhs_uns && rhs_size != lhs_size)
+ return false;
+
+ return true;
+}
+
+/* Given GS which is a cast to a scalar integer type, determine whether
+ the cast is legal for strength reduction. If so, make at least one
+ appropriate entry in the candidate table. */
+
+static void
+slsr_process_cast (gimple_stmt_iterator gsi, gimple gs, bool speed)
+{
+ tree lhs, rhs1, ctype;
+ slsr_cand_t base_cand, c, c2;
+ unsigned savings = 0;
+ void **slot;
+
+ if (!legal_cast_p (gs))
+ return;
+
+ lhs = gimple_assign_lhs (gs);
+ rhs1 = gimple_assign_rhs1 (gs);
+ base_cand = base_cand_from_table (rhs1);
+ ctype = TREE_TYPE (lhs);
+
+ if (base_cand)
+ {
+ while (base_cand)
+ {
+ /* Propagate all data from the base candidate except the type,
+ which comes from the cast, and the base candidate's cast,
+ which is no longer applicable. */
+ if (has_single_use (rhs1))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+
+ c = alloc_cand_and_find_basis (base_cand->kind, gs,
+ base_cand->base_name,
+ base_cand->index, base_cand->stride,
+ ctype, savings, gsi);
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+ }
+ else
+ {
+ /* If nothing is known about the RHS, create fresh CAND_ADD and
+ CAND_MULT interpretations:
+
+ X = Y + (0 * 1)
+ X = (Y + 0) * 1
+
+ The first of these is somewhat arbitrary, but the choice of
+ 1 for the stride simplifies the logic for propagating casts
+ into their uses. */
+ c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
+ integer_one_node, ctype, 0, gsi);
+ c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
+ integer_one_node, ctype, 0, gsi);
+ c->next_interp = c2->cand_num;
+ }
+
+ /* Add the first (or only) interpretation to the statement-candidate
+ mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+}
+
+/* Given GS which is a copy of a scalar integer type, make at least one
+ appropriate entry in the candidate table.
+
+ This interface is included for completeness, but is unnecessary
+ if this pass immediately follows a pass that performs copy
+ propagation, such as DOM. */
+
+static void
+slsr_process_copy (gimple_stmt_iterator gsi, gimple gs, bool speed)
+{
+ slsr_cand_t base_cand, c, c2;
+ void **slot;
+ unsigned savings = 0;
+ tree rhs1 = gimple_assign_rhs1 (gs);
+
+ if (TREE_CODE (rhs1) != SSA_NAME)
+ return;
+
+ base_cand = base_cand_from_table (rhs1);
+
+ if (base_cand)
+ {
+ while (base_cand)
+ {
+ /* Propagate all data from the base candidate. */
+ if (has_single_use (rhs1))
+ savings = (base_cand->dead_savings
+ + stmt_cost (base_cand->cand_stmt, speed));
+
+ c = alloc_cand_and_find_basis (base_cand->kind, gs,
+ base_cand->base_name,
+ base_cand->index, base_cand->stride,
+ base_cand->cand_type, savings, gsi);
+ if (base_cand->next_interp)
+ base_cand = lookup_cand (base_cand->next_interp);
+ else
+ base_cand = NULL;
+ }
+ }
+ else
+ {
+ /* If nothing is known about the RHS, create fresh CAND_ADD and
+ CAND_MULT interpretations:
+
+ X = Y + (0 * 1)
+ X = (Y + 0) * 1
+
+ The first of these is somewhat arbitrary, but the choice of
+ 1 for the stride simplifies the logic for propagating casts
+ into their uses. */
+ c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero,
+ integer_one_node, TREE_TYPE (rhs1),
+ 0, gsi);
+ c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero,
+ integer_one_node, TREE_TYPE (rhs1),
+ 0, gsi);
+ c->next_interp = c2->cand_num;
+ }
+
+ /* Add the first (or only) interpretation to the statement-candidate
+ mapping. */
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+}
+
+/* Find strength-reduction candidates in block BB. */
+
+static void
+find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED,
+ basic_block bb)
+{
+ bool speed = optimize_bb_for_speed_p (bb);
+ gimple_stmt_iterator gsi;
+
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple gs = gsi_stmt (gsi);
+ if (is_gimple_assign (gs)
+ && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
+ {
+ switch (gimple_assign_rhs_code (gs))
+ {
+ case MULT_EXPR:
+ slsr_process_mul (gsi, gs, speed);
+ break;
+
+ case PLUS_EXPR:
+ case POINTER_PLUS_EXPR:
+ case MINUS_EXPR:
+ slsr_process_add (gsi, gs, speed);
+ break;
+
+ case NEGATE_EXPR:
+ slsr_process_neg (gsi, gs, speed);
+ break;
+
+ case NOP_EXPR:
+ slsr_process_cast (gsi, gs, speed);
+ break;
+
+ case MODIFY_EXPR:
+ slsr_process_copy (gsi, gs, speed);
+ break;
+
+ default:
+ ;
+ }
+ }
+ }
+}
+
+/* Dump a candidate for debug. */
+
+static void
+dump_candidate (slsr_cand_t c)
+{
+ fprintf (dump_file, "%3d [%d] ", c->cand_num,
+ gimple_bb (c->cand_stmt)->index);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ switch (c->kind)
+ {
+ case CAND_MULT:
+ fputs (" MULT : (", dump_file);
+ print_generic_expr (dump_file, c->base_name, 0);
+ fputs (" + ", dump_file);
+ dump_double_int (dump_file, c->index, false);
+ fputs (") * ", dump_file);
+ print_generic_expr (dump_file, c->stride, 0);
+ fputs (" : ", dump_file);
+ break;
+ case CAND_ADD:
+ fputs (" ADD : ", dump_file);
+ print_generic_expr (dump_file, c->base_name, 0);
+ fputs (" + (", dump_file);
+ dump_double_int (dump_file, c->index, false);
+ fputs (" * ", dump_file);
+ print_generic_expr (dump_file, c->stride, 0);
+ fputs (") : ", dump_file);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ print_generic_expr (dump_file, c->cand_type, 0);
+ fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
+ c->basis, c->dependent, c->sibling);
+ fprintf (dump_file, " next-interp: %d dead-savings: %d\n",
+ c->next_interp, c->dead_savings);
+ if (c->def_phi)
+ {
+ fputs (" phi: ", dump_file);
+ print_gimple_stmt (dump_file, c->def_phi, 0, 0);
+ }
+ fputs ("\n", dump_file);
+}
+
+/* Dump the candidate vector for debug. */
+
+static void
+dump_cand_vec (void)
+{
+ unsigned i;
+ slsr_cand_t c;
+
+ fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
+
+ FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
+ dump_candidate (c);
+}
+
+/* Callback used to dump the candidate chains hash table. */
+
+static int
+base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED)
+{
+ const_cand_chain_t chain = *((const_cand_chain_t *) slot);
+ cand_chain_t p;
+
+ print_generic_expr (dump_file, chain->base_name, 0);
+ fprintf (dump_file, " -> %d", chain->cand->cand_num);
+
+ for (p = chain->next; p; p = p->next)
+ fprintf (dump_file, " -> %d", p->cand->cand_num);
+
+ fputs ("\n", dump_file);
+ return 1;
+}
+
+/* Dump the candidate chains hash table. */
+
+static void
+dump_cand_chains (void)
+{
+ fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
+ htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL);
+ fputs ("\n", dump_file);
+}
+
+/* Dump the increment vector for debug. */
+
+static void
+dump_incr_vec (void)
+{
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ unsigned i;
+
+ fprintf (dump_file, "\nIncrement vector:\n\n");
+
+ for (i = 0; i < incr_vec_len; i++)
+ {
+ fprintf (dump_file, "%3d increment: ", i);
+ dump_double_int (dump_file, incr_vec[i].incr, false);
+ fprintf (dump_file, "\n count: %d", incr_vec[i].count);
+ fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
+ fputs ("\n initializer: ", dump_file);
+ print_generic_expr (dump_file, incr_vec[i].initializer, 0);
+ fputs ("\n\n", dump_file);
+ }
+ }
+}
+
+/* Recursive helper for unconditional_cands_with_known_stride_p.
+ Returns TRUE iff C, its siblings, and its dependents are all
+ unconditional candidates. */
+
+static bool
+unconditional_cands (slsr_cand_t c)
+{
+ if (c->def_phi)
+ return false;
+
+ if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
+ return false;
+
+ if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
+ return false;
+
+ return true;
+}
+
+/* Determine whether or not the tree of candidates rooted at
+ ROOT consists entirely of unconditional increments with
+ an INTEGER_CST stride. */
+
+static bool
+unconditional_cands_with_known_stride_p (slsr_cand_t root)
+{
+ /* The stride is identical for all related candidates, so
+ check it once. */
+ if (TREE_CODE (root->stride) != INTEGER_CST)
+ return false;
+
+ return unconditional_cands (lookup_cand (root->dependent));
+}
+
+/* Determine whether or not the tree of candidates rooted at
+ ROOT consists entirely of unconditional increments with
+ an SSA_NAME stride. */
+
+static bool
+unconditional_cands_with_unknown_stride_p (slsr_cand_t root)
+{
+ /* The stride is identical for all related candidates, so
+ check it once. */
+ if (TREE_CODE (root->stride) != SSA_NAME)
+ return false;
+
+ return unconditional_cands (lookup_cand (root->dependent));
+}
+
+/* Calculate the increment required for candidate C relative to
+ its basis. */
+
+static double_int
+cand_increment (slsr_cand_t c)
+{
+ slsr_cand_t basis;
+
+ /* If the candidate doesn't have a basis, just return its own
+ index. This is useful in record_increments to help us find
+ an existing initializer. */
+ if (!c->basis)
+ return c->index;
+
+ basis = lookup_cand (c->basis);
+ gcc_assert (operand_equal_p (c->base_name, basis->base_name, 0));
+ return double_int_sub (c->index, basis->index);
+}
+
+/* Replace candidate C, each sibling of candidate C, and each
+ dependent of candidate C with an add or subtract. Note that we
+ only operate on CAND_MULTs with known strides, so we will never
+ generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is
+ replaced by X = Y + ((i - i') * S), as described in the module
+ commentary. The folded value ((i - i') * S) is referred to here
+ as the "bump." */
+
+static void
+replace_dependents (slsr_cand_t c)
+{
+ slsr_cand_t basis = lookup_cand (c->basis);
+ tree basis_name = gimple_assign_lhs (basis->cand_stmt);
+ tree incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
+ double_int stride = tree_to_double_int (c->stride);
+ double_int bump = double_int_mul (cand_increment (c), stride);
+ enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
+
+ /* It is highly unlikely, but possible, that the resulting
+ bump doesn't fit in a HWI. Abandon the replacement
+ in this case. This does not affect siblings or dependents
+ of C. Restriction to signed HWI is conservative for unsigned
+ types but allows for safe negation without twisted logic. */
+ if (double_int_fits_in_shwi_p (bump)
+ /* It is not useful to replace casts, copies, or adds of
+ an SSA name and a constant. */
+ && cand_code != MODIFY_EXPR
+ && cand_code != NOP_EXPR
+ && c->kind != CAND_ADD)
+ {
+ enum tree_code code = PLUS_EXPR;
+ tree bump_tree;
+ gimple stmt_to_print = NULL;
+
+ if (double_int_negative_p (bump))
+ {
+ code = MINUS_EXPR;
+ bump = double_int_neg (bump);
+ }
+
+ bump_tree = double_int_to_tree (incr_type, bump);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Replacing: ", dump_file);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ }
+
+ if (double_int_zero_p (bump))
+ {
+ tree lhs = gimple_assign_lhs (c->cand_stmt);
+ gimple copy_stmt = gimple_build_assign (lhs, basis_name);
+ gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
+ gsi_replace (&c->cand_gsi, copy_stmt, false);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ stmt_to_print = copy_stmt;
+ }
+ else
+ {
+ tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
+ tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
+ if (cand_code != NEGATE_EXPR
+ && ((operand_equal_p (rhs1, basis_name, 0)
+ && operand_equal_p (rhs2, bump_tree, 0))
+ || (operand_equal_p (rhs1, bump_tree, 0)
+ && operand_equal_p (rhs2, basis_name, 0))))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("(duplicate, not actually replacing)", dump_file);
+ stmt_to_print = c->cand_stmt;
+ }
+ }
+ else
+ {
+ gimple_assign_set_rhs_with_ops (&c->cand_gsi, code,
+ basis_name, bump_tree);
+ update_stmt (gsi_stmt (c->cand_gsi));
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ stmt_to_print = gsi_stmt (c->cand_gsi);
+ }
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("With: ", dump_file);
+ print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
+ fputs ("\n", dump_file);
+ }
+ }
+
+ if (c->sibling)
+ replace_dependents (lookup_cand (c->sibling));
+
+ if (c->dependent)
+ replace_dependents (lookup_cand (c->dependent));
+}
+
+/* Count the number of candidates in the tree rooted at C. */
+
+static unsigned
+count_candidates (slsr_cand_t c)
+{
+ unsigned count = 1;
+
+ if (c->sibling)
+ count += count_candidates (lookup_cand (c->sibling));
+
+ if (c->dependent)
+ count += count_candidates (lookup_cand (c->dependent));
+
+ return count;
+}
+
+/* Determine how many times each unique increment occurs in the set
+ of candidates rooted at C's parent, recording the data in the
+ increment vector. For each unique increment I, if an initializer
+ T_0 = stride * I is provided by a candidate that dominates all
+ candidates with the same increment, also record T_0 for subsequent
+ use. */
+
+static void
+record_increments (slsr_cand_t c)
+{
+ double_int increment = cand_increment (c);
+ bool found = false;
+ unsigned i;
+
+ for (i = 0; i < incr_vec_len; i++)
+ {
+ if (double_int_equal_p (incr_vec[i].incr, increment))
+ {
+ incr_vec[i].count++;
+ found = true;
+
+ /* If we previously recorded an initializer that doesn't
+ dominate this candidate, it's not going to be useful to
+ us after all. */
+ if (incr_vec[i].initializer
+ && !dominated_by_p (CDI_DOMINATORS,
+ gimple_bb (c->cand_stmt),
+ incr_vec[i].init_bb))
+ {
+ incr_vec[i].initializer = NULL_TREE;
+ incr_vec[i].init_bb = NULL;
+ }
+
+ break;
+ }
+ }
+
+ if (!found)
+ {
+ /* The first time we see an increment, create the entry for it.
+ If this candidate doesn't have a basis, set the count to zero.
+ We're only processing it so it can possibly provide an
+ initializer for other candidates. */
+ incr_vec[incr_vec_len].incr = increment;
+ incr_vec[incr_vec_len].count = c->basis ? 1 : 0;
+ incr_vec[incr_vec_len].cost = COST_INFINITE;
+
+ /* Optimistically record the first occurrence of this increment
+ as providing an initializer (if it does); we will revise this
+ opinion later if it doesn't dominate all other occurrences.
+ Exception: increments of -1, 0, 1 never need initializers. */
+ if (c->kind == CAND_ADD
+ && double_int_equal_p (c->index, increment)
+ && (double_int_scmp (increment, double_int_one) > 0
+ || double_int_scmp (increment, double_int_minus_one) < 0))
+ {
+ tree t0;
+ tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
+ tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
+ if (operand_equal_p (rhs1, c->base_name, 0))
+ t0 = rhs2;
+ else
+ t0 = rhs1;
+ if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0)))
+ {
+ incr_vec[incr_vec_len].initializer = t0;
+ incr_vec[incr_vec_len++].init_bb
+ = gimple_bb (SSA_NAME_DEF_STMT (t0));
+ }
+ else
+ {
+ incr_vec[incr_vec_len].initializer = NULL_TREE;
+ incr_vec[incr_vec_len++].init_bb = NULL;
+ }
+ }
+ else
+ {
+ incr_vec[incr_vec_len].initializer = NULL_TREE;
+ incr_vec[incr_vec_len++].init_bb = NULL;
+ }
+ }
+
+ if (c->sibling)
+ record_increments (lookup_cand (c->sibling));
+
+ if (c->dependent)
+ record_increments (lookup_cand (c->dependent));
+}
+
+/* Add COST_IN to the lowest cost of any dependent path starting at
+ candidate C or any of its siblings, counting only candidates along
+ such paths with increment INCR. Assume that replacing a candidate
+ reduces cost by REPL_SAVINGS. Also account for savings from any
+ statements that would go dead. */
+
+static int
+lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
+ double_int incr)
+{
+ int local_cost, sib_cost;
+
+ if (double_int_equal_p (incr, cand_increment (c)))
+ local_cost = cost_in - repl_savings - c->dead_savings;
+ else
+ local_cost = cost_in - c->dead_savings;
+
+ if (c->dependent)
+ local_cost = lowest_cost_path (local_cost, repl_savings,
+ lookup_cand (c->dependent), incr);
+ if (c->sibling)
+ {
+ sib_cost = lowest_cost_path (cost_in, repl_savings,
+ lookup_cand (c->sibling), incr);
+ local_cost = MIN (local_cost, sib_cost);
+ }
+
+ return local_cost;
+}
+
+/* Compute the total savings that would accrue from all replacements
+ in the candidate tree rooted at C, counting only candidates with
+ increment INCR. Assume that replacing a candidate reduces cost
+ by REPL_SAVINGS. Also account for savings from statements that
+ would go dead. */
+
+static int
+total_savings (int repl_savings, slsr_cand_t c, double_int incr)
+{
+ int savings = 0;
+
+ if (double_int_equal_p (incr, cand_increment (c)))
+ savings += repl_savings + c->dead_savings;
+
+ if (c->dependent)
+ savings += total_savings (repl_savings, lookup_cand (c->dependent), incr);
+
+ if (c->sibling)
+ savings += total_savings (repl_savings, lookup_cand (c->sibling), incr);
+
+ return savings;
+}
+
+/* Use target-specific costs to determine and record which increments
+ in the current candidate tree are profitable to replace, assuming
+ MODE and SPEED. FIRST_DEP is the first dependent of the root of
+ the candidate tree.
+
+ One slight limitation here is that we don't account for the possible
+ introduction of casts in some cases. See replace_one_candidate for
+ the cases where these are introduced. This should probably be cleaned
+ up sometime. */
+
+static void
+analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed)
+{
+ unsigned i;
+
+ for (i = 0; i < incr_vec_len; i++)
+ {
+ HOST_WIDE_INT incr = double_int_to_shwi (incr_vec[i].incr);
+
+ /* If somehow this increment is bigger than a HWI, we won't
+ be optimizing candidates that use it. And if the increment
+ has a count of zero, nothing will be done with it. */
+ if (!double_int_fits_in_shwi_p (incr_vec[i].incr)
+ || !incr_vec[i].count)
+ incr_vec[i].cost = COST_INFINITE;
+
+ /* Increments of 0, 1, and -1 are always profitable to replace,
+ because they always replace a multiply or add with an add or
+ copy, and may cause one or more existing instructions to go
+ dead. Exception: -1 can't be assumed to be profitable for
+ pointer addition. */
+ else if (incr == 0
+ || incr == 1
+ || (incr == -1
+ && (gimple_assign_rhs_code (first_dep->cand_stmt)
+ != POINTER_PLUS_EXPR)))
+ incr_vec[i].cost = COST_NEUTRAL;
+
+ /* For any other increment, if this is a multiply candidate, we
+ must introduce a temporary T and initialize it with
+ T_0 = stride * increment. When optimizing for speed, walk the
+ candidate tree to calculate the best cost reduction along any
+ path; if it offsets the fixed cost of inserting the initializer,
+ replacing the increment is profitable. When optimizing for
+ size, instead calculate the total cost reduction from replacing
+ all candidates with this increment. */
+ else if (first_dep->kind == CAND_MULT)
+ {
+ int cost = multiply_by_cost (incr, mode, speed);
+ int repl_savings
+ = multiply_regs_cost (mode, speed) - add_regs_cost (mode, speed);
+
+ if (speed)
+ cost = lowest_cost_path (cost, repl_savings, first_dep,
+ incr_vec[i].incr);
+ else
+ cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr);
+
+ incr_vec[i].cost = cost;
+ }
+
+ /* If this is an add candidate, the initializer may already
+ exist, so only calculate the cost of the initializer if it
+ doesn't. We are replacing one add with another here, so the
+ known replacement savings is zero. We will account for removal
+ of dead instructions in lowest_cost_path or total_savings. */
+ else
+ {
+ int cost = 0;
+ if (!incr_vec[i].initializer)
+ cost = multiply_by_cost (incr, mode, speed);
+
+ if (speed)
+ cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr);
+ else
+ cost -= total_savings (0, first_dep, incr_vec[i].incr);
+
+ incr_vec[i].cost = cost;
+ }
+ }
+}
+
+/* Return the nearest common dominator of BB1 and BB2. If the blocks
+ are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
+ if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
+ return C2 in *WHERE; and if the NCD matches neither, return NULL in
+ *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
+
+static basic_block
+nearest_common_dominator_for_two_cands (basic_block bb1, basic_block bb2,
+ slsr_cand_t c1, slsr_cand_t c2,
+ slsr_cand_t *where)
+{
+ basic_block ncd;
+
+ if (!bb1)
+ {
+ *where = c2;
+ return bb2;
+ }
+
+ if (!bb2)
+ {
+ *where = c1;
+ return bb1;
+ }
+
+ ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
+
+ /* If both candidates are in the same block, the earlier
+ candidate wins. */
+ if (bb1 == ncd && bb2 == ncd)
+ {
+ gcc_assert (c1 && c2);
+ if (c1->cand_num < c2->cand_num)
+ *where = c1;
+ else
+ *where = c2;
+ }
+
+ /* Otherwise, if one of them produced a candidate in the
+ dominator, that one wins. */
+ else if (bb1 == ncd)
+ *where = c1;
+
+ else if (bb2 == ncd)
+ *where = c2;
+
+ /* If neither matches the dominator, neither wins. */
+ else
+ *where = NULL;
+
+ return ncd;
+}
+
+/* Consider all candidates in the tree rooted at C for which INCR
+ represents the required increment of C relative to its basis.
+ Find and return the basic block that most nearly dominates all
+ such candidates. If the returned block contains one or more of
+ the candidates, return the earliest candidate in the block in
+ *WHERE. */
+
+static basic_block
+nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr,
+ slsr_cand_t *where)
+{
+ basic_block sib_ncd = NULL, dep_ncd = NULL, ncd;
+ slsr_cand_t sib_where = NULL, dep_where = NULL, new_where;
+ double_int cand_incr = cand_increment (c);
+
+ /* First find the NCD of all siblings and dependents. */
+ if (c->sibling)
+ sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
+ incr, &sib_where);
+ if (c->dependent)
+ dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
+ incr, &dep_where);
+ if (!sib_ncd && !dep_ncd)
+ {
+ new_where = NULL;
+ ncd = NULL;
+ }
+ else if (sib_ncd && !dep_ncd)
+ {
+ new_where = sib_where;
+ ncd = sib_ncd;
+ }
+ else if (dep_ncd && !sib_ncd)
+ {
+ new_where = dep_where;
+ ncd = dep_ncd;
+ }
+ else
+ ncd = nearest_common_dominator_for_two_cands (sib_ncd, dep_ncd,
+ sib_where, dep_where,
+ &new_where);
+
+ /* If the candidate's increment doesn't match the one we're interested
+ in, then the result depends only on siblings and dependents. */
+ if (!double_int_equal_p (cand_incr, incr))
+ {
+ *where = new_where;
+ return ncd;
+ }
+
+ /* Otherwise, compare this candidate with the result from all siblings
+ and dependents. */
+ ncd = nearest_common_dominator_for_two_cands (ncd, gimple_bb (c->cand_stmt),
+ new_where, c, where);
+ return ncd;
+}
+
+/* Return TRUE if the increment indexed by INDEX is profitable to replace. */
+
+static inline bool
+profitable_increment_p (unsigned index)
+{
+ return (incr_vec[index].cost <= COST_NEUTRAL);
+}
+
+/* For each profitable increment in the increment vector not equal to
+ 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
+ dominator of all statements in the candidate chain rooted at C
+ that require that increment, and insert an initializer
+ T_0 = stride * increment at that location. Record T_0 with the
+ increment record. */
+
+static void
+insert_initializers (slsr_cand_t c)
+{
+ unsigned i;
+ tree new_var = NULL_TREE;
+
+ for (i = 0; i < incr_vec_len; i++)
+ {
+ basic_block bb;
+ slsr_cand_t where = NULL;
+ gimple init_stmt;
+ tree stride_type, new_name, incr_tree;
+
+ if (!profitable_increment_p (i)
+ || double_int_one_p (incr_vec[i].incr)
+ || (double_int_minus_one_p (incr_vec[i].incr)
+ && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR)
+ || double_int_zero_p (incr_vec[i].incr))
+ continue;
+
+ /* We may have already identified an existing initializer that
+ will suffice. */
+ if (incr_vec[i].initializer)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Using existing initializer: ", dump_file);
+ print_gimple_stmt (dump_file,
+ SSA_NAME_DEF_STMT (incr_vec[i].initializer),
+ 0, 0);
+ }
+ continue;
+ }
+
+ /* Find the block that most closely dominates all candidates
+ with this increment. If there is at least one candidate in
+ that block, the earliest one will be returned in WHERE. */
+ bb = nearest_common_dominator_for_cands (c, incr_vec[i].incr, &where);
+
+ /* Create a new SSA name to hold the initializer's value. */
+ stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride));
+
+ if (!new_var)
+ {
+ new_var = create_tmp_reg (stride_type, "slsr");
+ add_referenced_var (new_var);
+ }
+
+ new_name = make_ssa_name (new_var, NULL);
+ incr_vec[i].initializer = new_name;
+
+ /* Create the initializer and insert it in the latest possible
+ dominating position. */
+ incr_tree = double_int_to_tree (stride_type, incr_vec[i].incr);
+ init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name,
+ c->stride, incr_tree);
+ if (where)
+ {
+ gsi_insert_before (&where->cand_gsi, init_stmt, GSI_SAME_STMT);
+ gimple_set_location (init_stmt, gimple_location (where->cand_stmt));
+ }
+ else
+ {
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ gimple basis_stmt = lookup_cand (c->basis)->cand_stmt;
+
+ if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi)))
+ gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
+ else
+ gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT);
+
+ gimple_set_location (init_stmt, gimple_location (basis_stmt));
+ }
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Inserting initializer: ", dump_file);
+ print_gimple_stmt (dump_file, init_stmt, 0, 0);
+ }
+ }
+}
+
+/* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
+ type TO_TYPE, and insert it in front of the statement represented
+ by candidate C. Use *NEW_VAR to create the new SSA name. Return
+ the new SSA name. */
+
+static tree
+introduce_cast_before_cand (slsr_cand_t c, tree to_type,
+ tree from_expr, tree *new_var)
+{
+ tree cast_lhs;
+ gimple cast_stmt;
+
+ if (!*new_var)
+ {
+ *new_var = create_tmp_reg (to_type, "slsr");
+ add_referenced_var (*new_var);
+ }
+
+ cast_lhs = make_ssa_name (*new_var, NULL);
+ cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs,
+ from_expr, NULL_TREE);
+ gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
+ gsi_insert_before (&c->cand_gsi, cast_stmt, GSI_SAME_STMT);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs (" (Inserting: ", dump_file);
+ print_gimple_stmt (dump_file, cast_stmt, 0, 0);
+ fputs (")\n", dump_file);
+ }
+
+ return cast_lhs;
+}
+
+/* Replace the RHS of the statement represented by candidate C with
+ NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
+ leave C unchanged or just interchange its operands. The original
+ operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
+ If the replacement was made and we are doing a details dump,
+ return the revised statement, else NULL. */
+
+static gimple
+replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
+ enum tree_code old_code, tree old_rhs1, tree old_rhs2,
+ slsr_cand_t c)
+{
+ if (new_code != old_code
+ || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
+ || !operand_equal_p (new_rhs2, old_rhs2, 0))
+ && (!operand_equal_p (new_rhs1, old_rhs2, 0)
+ || !operand_equal_p (new_rhs2, old_rhs1, 0))))
+ {
+ gimple_assign_set_rhs_with_ops (&c->cand_gsi, new_code,
+ new_rhs1, new_rhs2);
+ update_stmt (gsi_stmt (c->cand_gsi));
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ return gsi_stmt (c->cand_gsi);
+ }
+
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fputs (" (duplicate, not actually replacing)\n", dump_file);
+
+ return NULL;
+}
+
+/* Strength-reduce the statement represented by candidate C by replacing
+ it with an equivalent addition or subtraction. I is the index into
+ the increment vector identifying C's increment. NEW_VAR is used to
+ create a new SSA name if a cast needs to be introduced. */
+
+static void
+replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var)
+{
+ gimple stmt_to_print = NULL;
+ tree orig_rhs1, orig_rhs2, basis_name;
+ slsr_cand_t basis;
+ tree rhs2;
+ enum tree_code orig_code, repl_code;
+
+ orig_code = gimple_assign_rhs_code (c->cand_stmt);
+ orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
+ orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
+ basis = lookup_cand (c->basis);
+ basis_name = gimple_assign_lhs (basis->cand_stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Replacing: ", dump_file);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ stmt_to_print = c->cand_stmt;
+ }
+
+ if (orig_code == POINTER_PLUS_EXPR)
+ repl_code = POINTER_PLUS_EXPR;
+ else
+ repl_code = PLUS_EXPR;
+
+ /* If the increment has an initializer T_0, replace the candidate
+ statement with an add of the basis name and the initializer. */
+ if (incr_vec[i].initializer)
+ {
+ tree init_type = TREE_TYPE (SSA_NAME_VAR (incr_vec[i].initializer));
+ tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2));
+
+ if (types_compatible_p (orig_type, init_type))
+ rhs2 = incr_vec[i].initializer;
+ else
+ rhs2 = introduce_cast_before_cand (c, orig_type,
+ incr_vec[i].initializer,
+ new_var);
+
+ stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
+ orig_code, orig_rhs1, orig_rhs2,
+ c);
+ }
+
+ /* Otherwise, the increment is one of -1, 0, and 1. Replace
+ with a subtract of the stride from the basis name, a copy
+ from the basis name, or an add of the stride to the basis
+ name, respectively. It may be necessary to introduce a
+ cast (or reuse an existing cast). */
+ else if (double_int_one_p (incr_vec[i].incr))
+ {
+ tree stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride));
+ tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2));
+
+ if (types_compatible_p (orig_type, stride_type))
+ rhs2 = c->stride;
+ else
+ rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
+
+ stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
+ orig_code, orig_rhs1, orig_rhs2,
+ c);
+ }
+
+ else if (double_int_minus_one_p (incr_vec[i].incr))
+ {
+ tree stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride));
+ tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2));
+ gcc_assert (repl_code != POINTER_PLUS_EXPR);
+
+ if (types_compatible_p (orig_type, stride_type))
+ rhs2 = c->stride;
+ else
+ rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var);
+
+ if (repl_code != MINUS_EXPR
+ || !operand_equal_p (basis_name, orig_rhs1, 0)
+ || !operand_equal_p (rhs2, orig_rhs2, 0))
+ {
+ gimple_assign_set_rhs_with_ops (&c->cand_gsi, MINUS_EXPR,
+ basis_name, rhs2);
+ update_stmt (gsi_stmt (c->cand_gsi));
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ stmt_to_print = gsi_stmt (c->cand_gsi);
+ }
+ else if (dump_file && (dump_flags & TDF_DETAILS))
+ fputs (" (duplicate, not actually replacing)\n", dump_file);
+ }
+
+ else if (double_int_zero_p (incr_vec[i].incr))
+ {
+ tree lhs = gimple_assign_lhs (c->cand_stmt);
+ tree lhs_type = TREE_TYPE (SSA_NAME_VAR (lhs));
+ tree basis_type = TREE_TYPE (SSA_NAME_VAR (basis_name));
+
+ if (types_compatible_p (lhs_type, basis_type))
+ {
+ gimple copy_stmt = gimple_build_assign (lhs, basis_name);
+ gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
+ gsi_replace (&c->cand_gsi, copy_stmt, false);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ stmt_to_print = copy_stmt;
+ }
+ else
+ {
+ gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs,
+ basis_name,
+ NULL_TREE);
+ gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
+ gsi_replace (&c->cand_gsi, cast_stmt, false);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ stmt_to_print = cast_stmt;
+ }
+ }
+ else
+ gcc_unreachable ();
+
+ if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
+ {
+ fputs ("With: ", dump_file);
+ print_gimple_stmt (dump_file, stmt_to_print, 0, 0);
+ fputs ("\n", dump_file);
+ }
+}
+
+/* For each candidate in the tree rooted at C, replace it with
+ an increment if such has been shown to be profitable. */
+
+static void
+replace_profitable_candidates (slsr_cand_t c)
+{
+ double_int increment = cand_increment (c);
+ unsigned i;
+ tree new_var = NULL;
+ enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
+
+ /* Find C's increment in the increment vector. */
+ for (i = 0;
+ i < incr_vec_len && !double_int_equal_p (increment, incr_vec[i].incr);
+ i++)
+ ;
+
+ gcc_assert (i < incr_vec_len);
+
+ /* Only process profitable increments. Nothing useful can be done
+ to a cast or copy. */
+ if (profitable_increment_p (i)
+ && orig_code != MODIFY_EXPR
+ && orig_code != NOP_EXPR)
+ replace_one_candidate (c, i, &new_var);
+
+ if (c->sibling)
+ replace_profitable_candidates (lookup_cand (c->sibling));
+
+ if (c->dependent)
+ replace_profitable_candidates (lookup_cand (c->dependent));
+}
+
+/* Analyze costs of related candidates in the candidate vector,
+ and make beneficial replacements. */
+
+static void
+analyze_candidates_and_replace (void)
+{
+ unsigned i;
+ slsr_cand_t c;
+
+ /* Each candidate that has a null basis and a non-null
+ dependent is the root of a tree of related statements.
+ Analyze each tree to determine a subset of those
+ statements that can be replaced with maximum benefit. */
+ FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
+ {
+ slsr_cand_t first_dep;
+
+ if (c->basis != 0 || c->dependent == 0)
+ continue;
+
+ /* If the candidate statement's block number is zero, that means
+ another interpretation of this statement has caused the statement
+ to be replaced. Don't worry about potential basis chains for
+ such already-handled statements. */
+ if (!gimple_bb (c->cand_stmt))
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
+ c->cand_num);
+
+ first_dep = lookup_cand (c->dependent);
+
+ /* If the common stride of all related candidates is a
+ known constant, and none of these has a phi-dependence,
+ then all replacements are considered profitable.
+ Each replaces a multiply by a single add, with the
+ possibility that a feeding add also goes dead as a
+ result. */
+ if (unconditional_cands_with_known_stride_p (c))
+ replace_dependents (first_dep);
+
+ /* When the stride is an SSA name, it may still be profitable
+ to replace some or all of the dependent candidates, depending
+ on whether the introduced increments can be reused, or are
+ less expensive to calculate than the replaced statements. */
+ else if (unconditional_cands_with_unknown_stride_p (c))
+ {
+ /* Construct an array of increments for this candidate chain. */
+ unsigned length = count_candidates (c);
+ unsigned info_size = (sizeof (incr_info) + 15) & ~15;
+ enum machine_mode mode;
+ bool speed;
+
+ incr_vec = XNEWVAR (incr_info, length * info_size);
+ incr_vec_len = 0;
+ record_increments (c);
+
+ /* Determine which increments are profitable to replace. */
+ mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
+ speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
+ analyze_increments (first_dep, mode, speed);
+
+ /* Insert initializers of the form T_0 = stride * increment
+ for use in profitable replacements. */
+ insert_initializers (first_dep);
+ dump_incr_vec ();
+
+ /* Perform the replacements. */
+ replace_profitable_candidates (first_dep);
+ free (incr_vec);
+ }
+
+ /* TODO: When conditional increments occur so that a
+ candidate is dependent upon a phi-basis, the cost of
+ introducing a temporary must be accounted for. */
+
+ /* TODO: Strength reduction candidates hidden in
+ addressing expressions are not yet handled, and will
+ have more complex cost functions. */
+ }
+}
+
+static unsigned
+execute_strength_reduction (void)
+{
+ struct dom_walk_data walk_data;
+
+ /* Create the allocation pool where candidates will reside. */
+ cand_pool = create_alloc_pool ("Strength reduction candidates",
+ sizeof (slsr_cand), 128);
+
+ /* Allocate the candidate vector. */
+ cand_vec = VEC_alloc (slsr_cand_t, heap, 128);
+
+ /* Allocate the mapping from statements to candidate indices. */
+ stmt_cand_map = htab_create (500, stmt_cand_hash,
+ stmt_cand_eq, stmt_cand_free);
+
+ /* Create the allocation pool where candidate chains will reside. */
+ chain_pool = create_alloc_pool ("Strength reduction chains",
+ sizeof (cand_chain), 128);
+
+ /* Allocate the mapping from base names to candidate chains. */
+ base_cand_map = htab_create (500, base_cand_hash,
+ base_cand_eq, base_cand_free);
+
+ /* Initialize the loop optimizer. We need to detect flow across
+ back edges, and this gives us dominator information as well. */
+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+ /* Set up callbacks for the generic dominator tree walker. */
+ walk_data.dom_direction = CDI_DOMINATORS;
+ walk_data.initialize_block_local_data = NULL;
+ walk_data.before_dom_children = find_candidates_in_block;
+ walk_data.after_dom_children = NULL;
+ walk_data.global_data = NULL;
+ walk_data.block_local_data_size = 0;
+ init_walk_dominator_tree (&walk_data);
+
+ /* Walk the CFG in predominator order looking for strength reduction
+ candidates. */
+ walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ dump_cand_vec ();
+ dump_cand_chains ();
+ }
+
+ /* Analyze costs and make appropriate replacements. */
+ analyze_candidates_and_replace ();
+
+ /* Free resources. */
+ fini_walk_dominator_tree (&walk_data);
+ loop_optimizer_finalize ();
+ htab_delete (base_cand_map);
+ free_alloc_pool (chain_pool);
+ htab_delete (stmt_cand_map);
+ VEC_free (slsr_cand_t, heap, cand_vec);
+ free_alloc_pool (cand_pool);
+
+ return 0;
+}
+
+static bool
+gate_strength_reduction (void)
+{
+ return optimize > 0;
+}
+
+struct gimple_opt_pass pass_strength_reduction =
+{
+ {
+ GIMPLE_PASS,
+ "slsr", /* name */
+ gate_strength_reduction, /* gate */
+ execute_strength_reduction, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_SLSR, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */
+ }
+};
===================================================================
@@ -816,6 +816,11 @@ bool stmt_invariant_in_loop_p (struct loop *, gimp
bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode,
addr_space_t);
unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool);
+unsigned add_regs_cost (enum machine_mode, bool);
+unsigned multiply_regs_cost (enum machine_mode, bool);
+unsigned add_const_cost (enum machine_mode, bool);
+unsigned extend_or_trunc_cost (tree, tree, bool);
+unsigned negate_cost (enum machine_mode, bool);
bool may_be_nonaddressable_p (tree expr);
/* In tree-ssa-threadupdate.c. */
===================================================================
@@ -1417,6 +1417,7 @@ OBJS = \
tree-ssa-reassoc.o \
tree-ssa-sccvn.o \
tree-ssa-sink.o \
+ tree-ssa-strength-reduction.o \
tree-ssa-strlen.o \
tree-ssa-structalias.o \
tree-ssa-tail-merge.o \
@@ -2422,6 +2423,10 @@ tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H)
alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) langhooks.h $(HASHTAB_H) $(GIMPLE_H) \
$(TREE_INLINE_H) tree-iterator.h tree-ssa-propagate.h tree-ssa-sccvn.h \
$(PARAMS_H) tree-pretty-print.h gimple-pretty-print.h gimple-fold.h
+tree-ssa-strength-reduction.o : tree-ssa-strength-reduction.c $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TREE_H) $(GIMPLE_H) $(BASIC_BLOCK_H) \
+ $(TREE_PASS_H) $(TIMEVAR_H) $(CFGLOOP_H) tree-pretty-print.h \
+ gimple-pretty-print.h alloc-pool.h $(TREE_FLOW_H) domwalk.h
tree-vrp.o : tree-vrp.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \
$(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \
===================================================================
@@ -1380,6 +1380,10 @@ init_optimization_passes (void)
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
NEXT_PASS (pass_dominator);
+ /* Strength reduction is more effective after constant and
+ copy propagation provided by DOM, but produces dead code
+ that needs to be cleaned up by CD_DCE. */
+ NEXT_PASS (pass_strength_reduction);
/* The only const/copy propagation opportunities left after
DOM should be due to degenerate PHI nodes. So rather than
run the full propagators, run a specialized pass which