===================================================================
@@ -455,6 +455,7 @@ extern struct gimple_opt_pass pass_tm_memopt;
extern struct gimple_opt_pass pass_tm_edges;
extern struct gimple_opt_pass pass_split_functions;
extern struct gimple_opt_pass pass_feedback_split_functions;
+extern struct gimple_opt_pass pass_strength_reduction;
/* IPA Passes */
extern struct simple_ipa_opt_pass pass_ipa_lower_emutls;
===================================================================
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern void foo (int);
+
+void
+f (int *p, unsigned int n)
+{
+ foo (*(p + n * 4));
+ foo (*(p + 32 + n * 4));
+ if (n > 3)
+ foo (*(p + 16 + n * 4));
+ else
+ foo (*(p + 48 + n * 4));
+}
+
+/* { dg-final { scan-tree-dump-times "\\+ 128" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 64" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 192" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+extern void foo (int);
+
+void
+f (int *p, int n)
+{
+ foo (*(p + n++ * 4));
+ foo (*(p + 32 + n++ * 4));
+ foo (*(p + 16 + n * 4));
+}
+
+/* { dg-final { scan-tree-dump-times "\\+ 144" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 96" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-optimized" } */
+
+int
+foo (int a[], int b[], int i)
+{
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ a[i] = b[i] + 2;
+ i++;
+ return i;
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 4" 2 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 8" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 12" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -0,0 +1,37 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fdump-tree-slsr -fdump-tree-optimized" } */
+
+void foo (int);
+
+int
+f (int i)
+{
+ int x, y;
+
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+
+ i = i + 5;
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+
+ i = i - 4;
+ x = i * 4;
+ y = x * 10;
+ foo (y);
+}
+
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\* 10" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 20;" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\- 16;" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\- 160" 1 "slsr" } } */
+/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\* 10" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\\+ 40" 1 "optimized" } } */
+/* { dg-final { cleanup-tree-dump "slsr" } } */
+/* { dg-final { cleanup-tree-dump "optimized" } } */
===================================================================
@@ -253,6 +253,7 @@ DEFTIMEVAR (TV_TREE_IFCOMBINE , "tree if-co
DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis")
DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization")
DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution")
+DEFTIMEVAR (TV_TREE_SLSR , "straight-line strength reduction")
/* Everything else in rest_of_compilation not included above. */
DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes")
===================================================================
@@ -0,0 +1,851 @@
+/* Straight-line strength reduction.
+ Copyright (C) 2011 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* There are many algorithms for performing strength reduction on
+ loops. This is not one of them. IVOPTS handles strength reduction
+ of induction variables just fine. This pass is intended to pick
+ up the crumbs it leaves behind, by considering opportunities for
+ strength reduction along dominator paths.
+
+ Strength reduction will be implemented in four stages, gradually
+ adding more complex candidates:
+
+ 1) Explicit multiplies, known constant multipliers, no
+ conditional increments. (complete)
+ 2) Explicit multiplies, unknown constant multipliers,
+ no conditional increments.
+ 3) Explicit multiplies, conditional increments.
+ 4) Implicit multiplies in addressing expressions.
+
+ It would also be possible to apply strength reduction to divisions
+ and modulos, but such opportunities are relatively uncommon.
+
+ Strength reduction is also currently restricted to integer operations.
+ If desired, it could be extended to floating-point operations under
+ control of something like -funsafe-math-optimizations. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tree.h"
+#include "gimple.h"
+#include "basic-block.h"
+#include "tree-pass.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-pretty-print.h"
+#include "gimple-pretty-print.h"
+#include "alloc-pool.h"
+#include "tree-flow.h"
+
+/* Index into the candidate vector, offset by 1. VECs are zero-based,
+ while cand_idx's are one-based, with zero indicating null. */
+
+typedef unsigned cand_idx;
+
+/* Information about a strength reduction candidate. There will be two
+ forms of candidates that we will handle, named for the last instruction
+ in the sequence that we hope to replace. In stage 1, only multiply
+ candidates are handled.
+
+ A "multiply" candidate is a statement S1 of the expanded form
+
+ S1: LHS = (B + c1) * M,
+
+ where B is an SSA name, c1 is a constant that may be zero, and
+ M is either an SSA name or a nonzero constant. A multiply
+ candidate may only be strength reduced if a previous "basis"
+ statement
+
+ S0: S = (B' + c0) * M
+
+ of the same form exists previously in the same block or in a
+ dominator. Assuming replacement is profitable, there are two
+ cases:
+
+ (1) If B = B', then S1 can be replaced with:
+
+ S1': LHS = S + (c1 - c0) * M,
+
+ where (c1 - c0) * M is folded to the extent possible.
+
+ (2) If B differs from B', then we require that B = B' + c0,
+ and S1 can be replaced with:
+
+ S1': LHS = S + c1 * M,
+
+ where c1 * M is folded if possible.
+
+ Candidate records are allocated from an allocation pool. They are
+ addressed both from a hash table keyed on S1, and from a vector of
+ candidate pointers arranged in predominator order. */
+
+typedef struct slsr_cand_d
+{
+ /* The candidate statement S1. */
+ gimple cand_stmt;
+
+ /* The base SSA name B. */
+ tree base_name;
+
+ /* The stride M. */
+ tree stride;
+
+ /* The index constant c1. */
+ double_int index;
+
+ /* Index of this candidate in the candidate vector. */
+ cand_idx cand_num;
+
+ /* Index of the basis statement S0, if any, in the candidate vector. */
+ cand_idx basis;
+
+ /* First candidate for which this candidate is a basis, if one exists. */
+ cand_idx dependent;
+
+ /* Next candidate having the same basis as this one. */
+ cand_idx sibling;
+
+ /* If this is a conditional candidate, the defining PHI statement
+ for the base SSA name B. */
+ gimple def_phi;
+
+ /* Access to the statement for subsequent modification. Cached to
+ save compile time. */
+ gimple_stmt_iterator cand_gsi;
+
+} slsr_cand, *slsr_cand_t;
+
+typedef const struct slsr_cand_d *const_slsr_cand_t;
+
+/* Candidates are maintained in a vector. If candidate X dominates
+ candidate Y, then X appears before Y in the vector; but the
+ converse does not necessarily hold. */
+DEF_VEC_P (slsr_cand_t);
+DEF_VEC_ALLOC_P (slsr_cand_t, heap);
+static VEC (slsr_cand_t, heap) *cand_vec;
+
+/* Hash table embodying a mapping from statements to candidates. */
+static htab_t stmt_cand_map;
+
+/* Allocation pool for candidates. */
+static alloc_pool cand_pool;
+
+
+/* Produce a pointer to the IDX'th candidate in the candidate vector. */
+
+static slsr_cand_t
+lookup_cand (cand_idx idx)
+{
+ return VEC_index (slsr_cand_t, cand_vec, idx - 1);
+}
+
+/* Callback to produce a hash value for a candidate. */
+
+static hashval_t
+stmt_cand_hash (const void *p)
+{
+ return htab_hash_pointer (((const_slsr_cand_t)p)->cand_stmt);
+}
+
+/* Callback when an element is removed from the hash table.
+ We never remove entries until the entire table is released. */
+
+static void
+stmt_cand_free (void *p ATTRIBUTE_UNUSED)
+{
+}
+
+/* Callback to return true if two candidates are equal. */
+
+static int
+stmt_cand_eq (const void *p1, const void *p2)
+{
+ const_slsr_cand_t const cand1 = (const_slsr_cand_t)p1;
+ const_slsr_cand_t const cand2 = (const_slsr_cand_t)p2;
+ return (cand1->cand_stmt == cand2->cand_stmt);
+}
+
+/* Return TRUE if GS is a statement that defines an SSA name from
+ a NOP_EXPR and is legal for us to combine an add and multiply
+ across. This is legitimate for casts from a signed type to
+ a signed or unsigned type of the same or larger size. It is not
+ legitimate to convert any unsigned type to a signed type, or
+ to an unsigned type of a different size.
+
+ The reasoning here is that signed integer overflow is undefined,
+ so any program that was expecting overflow that no longer occurs
+ is not correct. Unsigned integers, however, have wrap semantics,
+ and it is reasonable for programs to assume an overflowing add
+ will wrap. */
+
+static bool
+legal_cast_p (gimple gs)
+{
+ tree lhs, rhs;
+ unsigned src_size, dst_size, src_uns, dst_uns;
+
+ if (!is_gimple_assign (gs)
+ || TREE_CODE (gimple_assign_lhs (gs)) != SSA_NAME
+ || gimple_assign_rhs_code (gs) != NOP_EXPR)
+ return false;
+
+ lhs = gimple_assign_lhs (gs);
+ rhs = gimple_assign_rhs1 (gs);
+
+ if (TREE_CODE (rhs) != SSA_NAME)
+ return false;
+
+ src_size = TYPE_PRECISION (TREE_TYPE (SSA_NAME_VAR (rhs)));
+ src_uns = TYPE_UNSIGNED (TREE_TYPE (SSA_NAME_VAR (rhs)));
+ dst_size = TYPE_PRECISION (TREE_TYPE (SSA_NAME_VAR (lhs)));
+ dst_uns = TYPE_UNSIGNED (TREE_TYPE (SSA_NAME_VAR (lhs)));
+
+ if (dst_size < src_size)
+ return false;
+
+ if (src_uns && !dst_uns)
+ return false;
+
+ if (src_uns && dst_uns && src_size != dst_size)
+ return false;
+
+ return true;
+}
+
+/* If GS is a statement that defines an SSA name from a NOP_EXPR or
+ CONVERT_EXPR and is legal for our purposes (see legal_cast_p),
+ return the source SSA name. */
+
+static tree
+source_of_legal_cast (gimple gs)
+{
+ if (legal_cast_p (gs))
+ return gimple_assign_rhs1 (gs);
+
+ return NULL_TREE;
+}
+
+/* If GS is a statement that defines an SSA name from a NOP_EXPR or
+ CONVERT_EXPR and is legal for our purposes (see legal_cast_p),
+ return the target SSA name. */
+
+static tree
+target_of_legal_cast (gimple gs)
+{
+ if (legal_cast_p (gs))
+ return gimple_assign_lhs (gs);
+
+ return NULL_TREE;
+}
+
+/* Return TRUE if GS consists of an add or subtract of BASE_NAME
+ with a constant, with the result placed in an SSA name. GS is
+ known to be a gimple assignment. CODE is the RHS code for GS. */
+
+static bool
+stmt_adds_base_to_const (gimple gs, enum tree_code code, tree base_name)
+{
+ return (TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
+ && (code == PLUS_EXPR || code == MINUS_EXPR)
+ && operand_equal_p (gimple_assign_rhs1 (gs), base_name, 0)
+ && TREE_CODE (gimple_assign_rhs2 (gs)) == INTEGER_CST);
+}
+
+/* Recursive helper for find_basis_for_candidate. To find a
+ possible basis, first try the immediate uses of the given
+ BASE_NAME. If a particular use doesn't appear in a multiply,
+ see if it appears in an add that feeds one or more multiplies.
+ Recurse using the SSA name defined on the add. Each potential
+ basis found in this manner must also appear in a block that
+ dominates the candidate statement, be present in the candidate
+ table, and have the same stride M. If more than one possible
+ basis exists, pick the one with highest index in the vector,
+ which will be the most immediately dominating basis. */
+
+static slsr_cand_t
+find_basis_for_mul_candidate (slsr_cand_t c, tree base_name, slsr_cand_t basis)
+{
+ imm_use_iterator use_iter;
+ use_operand_p use_p;
+
+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, base_name)
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ slsr_cand_t use_basis = NULL;
+ enum tree_code code;
+ tree cast_target;
+
+ if (!is_gimple_assign (use_stmt))
+ continue;
+
+ /* When revising a candidate's basis, make sure not to pick itself. */
+ if (c->cand_stmt == use_stmt)
+ continue;
+
+ /* Look through casts where legal. */
+ cast_target = target_of_legal_cast (use_stmt);
+ if (cast_target)
+ return find_basis_for_mul_candidate (c, cast_target, basis);
+
+ /* If the use statement is a multiply, it's a potential basis.
+ Otherwise, we have to look at the uses of the use statement
+ to find any multiplies that may be potential bases. */
+ code = gimple_assign_rhs_code (use_stmt);
+
+ if (code == MULT_EXPR)
+ {
+ slsr_cand mapping_key;
+ mapping_key.cand_stmt = use_stmt;
+ use_basis = (slsr_cand_t)htab_find (stmt_cand_map, &mapping_key);
+ }
+
+ /* Recurse if this is an add of a constant that might feed a
+ multiply. */
+ else if (stmt_adds_base_to_const (use_stmt, code, base_name))
+ {
+ tree lhs = gimple_assign_lhs (use_stmt);
+ use_basis = find_basis_for_mul_candidate (c, lhs, basis);
+ }
+
+ /* A potential basis must dominate the candidate. */
+ if (!use_basis
+ || !operand_equal_p (c->stride, use_basis->stride, 0)
+ || !dominated_by_p (CDI_DOMINATORS,
+ gimple_bb (c->cand_stmt),
+ gimple_bb (use_stmt)))
+ continue;
+
+ /* When revising a candidate's basis, be careful not to pick
+ a basis that occurs after the candidate. */
+ if (use_basis->cand_num > c->cand_num)
+ continue;
+
+ if (!basis || basis->cand_num < use_basis->cand_num)
+ basis = use_basis;
+ }
+
+ return basis;
+}
+
+/* Look for the nearest dominating statement in the candidate
+ vector that can serve as a basis for the new CANDIDATE. If
+ found, adjust the dependent links for the basis entry and
+ return the index of the basis entry. Otherwise return zero. */
+
+static unsigned int
+find_basis_for_candidate (slsr_cand_t c)
+{
+ slsr_cand_t basis = find_basis_for_mul_candidate (c, c->base_name, NULL);
+
+ if (basis)
+ {
+ c->sibling = basis->dependent;
+ basis->dependent = c->cand_num;
+ return basis->cand_num;
+ }
+
+ return 0;
+}
+
+/* Starting with statement GS, look backwards through any
+ intervening legal casts to find one or more
+ adds of an SSA name and a constant. Return the earliest
+ SSA name in the chain as *BASE, and the sum of all constants
+ in the chain as INDEX. */
+
+static void
+combine_feeding_adds (gimple gs, tree *base, double_int *index)
+{
+ double_int new_index;
+ tree cast_source;
+
+ while ((cast_source = source_of_legal_cast (gs)))
+ {
+ gs = SSA_NAME_DEF_STMT (cast_source);
+ *base = cast_source;
+ }
+
+ /* If there aren't any more adds, we're done. */
+ if (!is_gimple_assign (gs)
+ || (gimple_assign_rhs_code (gs) != PLUS_EXPR
+ && gimple_assign_rhs_code (gs) != MINUS_EXPR)
+ || TREE_CODE (gimple_assign_rhs1 (gs)) != SSA_NAME
+ || TREE_CODE (gimple_assign_rhs2 (gs)) != INTEGER_CST)
+ return;
+
+ *base = gimple_assign_rhs1 (gs);
+ new_index = tree_to_double_int (gimple_assign_rhs2 (gs));
+
+ if (gimple_assign_rhs_code (gs) == MINUS_EXPR)
+ new_index = double_int_neg (new_index);
+
+ *index = double_int_add (*index, new_index);
+ combine_feeding_adds (SSA_NAME_DEF_STMT (*base), base, index);
+}
+
+/* Allocate storage for a new candidate and initialize its fields.
+ Attempt to find a basis for the candidate. */
+
+static slsr_cand_t
+alloc_cand_and_find_basis (gimple gs, tree base, tree stride,
+ double_int index, gimple_stmt_iterator gsi)
+{
+ slsr_cand_t c = (slsr_cand_t)pool_alloc (cand_pool);
+ c->cand_stmt = gs;
+ c->base_name = base;
+ c->stride = stride;
+ c->index = index;
+ c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1;
+ c->dependent = 0;
+ c->sibling = 0;
+ c->def_phi = NULL;
+ c->cand_gsi = gsi;
+ c->basis = find_basis_for_candidate (c);
+ return c;
+}
+
+/* Given statement GS containing an integer multiply, determine
+ whether it's a possible strength-reduction candidate. If so,
+ add it to the candidate vector and the statement-candidate
+ mapping. */
+
+static void
+process_possible_mul_candidate (gimple_stmt_iterator gsi, gimple gs)
+{
+ tree rhs1 = gimple_assign_rhs1 (gs);
+ tree rhs2 = gimple_assign_rhs2 (gs);
+ tree base;
+ double_int index;
+ slsr_cand_t c;
+ void **slot;
+
+ /* TODO: Unknown constant multipliers will be dealt with in
+ stage 2. */
+ if (TREE_CODE (rhs2) != INTEGER_CST)
+ return;
+
+ /* If RHS1 isn't fed by an addition or subtraction of an SSA_NAME
+ and a constant, then treat it as an add of zero. Look through
+ legal casts and combine multiple chained adds to find
+ the true base name and index. */
+ gcc_assert (TREE_CODE (rhs1) == SSA_NAME);
+ base = rhs1;
+ index = double_int_zero;
+ combine_feeding_adds (SSA_NAME_DEF_STMT (rhs1), &base, &index);
+
+ /* Allocate and initialize the candidate, and attempt to find
+ a basis for it. */
+ c = alloc_cand_and_find_basis (gs, base, rhs2, index, gsi);
+
+ VEC_safe_push (slsr_cand_t, heap, cand_vec, c);
+
+ slot = htab_find_slot (stmt_cand_map, c, INSERT);
+ gcc_assert (!*slot);
+ *slot = c;
+}
+
+/* Find strength-reduction candidates in block BB. */
+
+static void
+find_candidates_in_block (basic_block bb)
+{
+ gimple_stmt_iterator gsi;
+ basic_block son;
+
+ /* Process this block. */
+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+ {
+ gimple gs = gsi_stmt (gsi);
+ if (is_gimple_assign (gs)
+ && gimple_assign_rhs_code (gs) == MULT_EXPR
+ && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs)))))
+ process_possible_mul_candidate (gsi, gs);
+ }
+
+ /* Process dominated blocks. */
+ for (son = first_dom_son (CDI_DOMINATORS, bb);
+ son;
+ son = next_dom_son (CDI_DOMINATORS, son))
+ find_candidates_in_block (son);
+}
+
+/* Dump a candidate for debug. */
+
+static void
+dump_candidate (slsr_cand_t c)
+{
+ fprintf (dump_file, "%3d [%d] ", c->cand_num,
+ gimple_bb (c->cand_stmt)->index);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ fputs (" base: ", dump_file);
+ print_generic_expr (dump_file, c->base_name, 0);
+ fputs ("\n index: ", dump_file);
+ dump_double_int (dump_file, c->index, false);
+ fputs ("\n stride: ", dump_file);
+ print_generic_expr (dump_file, c->stride, 0);
+ fprintf (dump_file, "\n basis: %3d", c->basis);
+ fprintf (dump_file, "\n dependent: %3d", c->dependent);
+ fprintf (dump_file, "\n sibling: %3d", c->sibling);
+ fputs ("\n phi: ", dump_file);
+ if (c->def_phi)
+ print_gimple_stmt (dump_file, c->def_phi, 0, 0);
+ else
+ fputs ("n/a", dump_file);
+ fputs ("\n\n", dump_file);
+}
+
+/* Dump the candidate vector for debug. */
+
+static void
+dump_cand_vec (void)
+{
+ unsigned i;
+ slsr_cand_t c;
+
+ fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
+
+ FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
+ dump_candidate (c);
+}
+
+/* Recursive helper for unconditional_cands_with_known_stride_p.
+ Returns TRUE iff C, its siblings, and its dependents are all
+ unconditional candidates. */
+
+static bool
+unconditional_cands (slsr_cand_t c)
+{
+ if (c->def_phi)
+ return false;
+
+ if (c->sibling && !unconditional_cands (lookup_cand (c->sibling)))
+ return false;
+
+ if (c->dependent && !unconditional_cands (lookup_cand (c->dependent)))
+ return false;
+
+ return true;
+}
+
+/* Determine whether or not the tree of candidates rooted at
+ ROOT consists entirely of unconditional increments with
+ an INTEGER_CST stride. */
+
+static bool
+unconditional_cands_with_known_stride_p (slsr_cand_t root)
+{
+ /* The stride is identical for all related candidates, so
+ check it once. */
+ if (TREE_CODE (root->stride) != INTEGER_CST)
+ return false;
+
+ return unconditional_cands (lookup_cand (root->dependent));
+}
+
+/* GS is an add or subtract that used to be a multiply. Follow
+ its immediate uses to update the candidate table accordingly. */
+
+static void
+update_chained_candidates (gimple gs)
+{
+ tree base_name = gimple_assign_lhs (gs);
+ imm_use_iterator use_iter;
+ use_operand_p use_p;
+ tree cast_target;
+
+ FOR_EACH_IMM_USE_FAST (use_p, use_iter, base_name)
+ {
+ gimple use_stmt = USE_STMT (use_p);
+ enum tree_code code;
+ slsr_cand_t c;
+
+ if (!is_gimple_assign (use_stmt))
+ continue;
+
+ /* Look forward through converts that don't change semantics. */
+ cast_target = target_of_legal_cast (use_stmt);
+ if (cast_target)
+ {
+ update_chained_candidates (use_stmt);
+ continue;
+ }
+
+ code = gimple_assign_rhs_code (use_stmt);
+
+ /* Case 1: The name defined by the add appears in a
+ multiply that exists in the candidate table. */
+ if (code == MULT_EXPR)
+ {
+ slsr_cand mapping_key;
+ tree base;
+ double_int index;
+
+ mapping_key.cand_stmt = use_stmt;
+ if (!(c = (slsr_cand_t)htab_find (stmt_cand_map, &mapping_key)))
+ continue;
+
+ base = base_name;
+ index = double_int_zero;
+ combine_feeding_adds (gs, &base, &index);
+ c->base_name = base;
+ c->index = index;
+
+ if (!c->basis)
+ c->basis = find_basis_for_candidate (c);
+ else
+ /* If all has worked correctly, the modified candidate has
+ the same basis as before. */
+ gcc_assert (lookup_cand (c->basis)
+ == find_basis_for_mul_candidate (c, base, NULL));
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("\nRevised candidate:\n", dump_file);
+ dump_candidate (c);
+ }
+ }
+
+ /* Case 2: The name defined by the add appears in another
+ add with a constant, which in turn feeds one or more multiplies
+ in the candidate table. Recurse to find the multiplies. */
+ else if (stmt_adds_base_to_const (use_stmt, code, base_name))
+ update_chained_candidates (use_stmt);
+ }
+}
+
+/* Calculate the increment required for candidate C relative to
+ its basis. */
+
+static double_int
+cand_increment (slsr_cand_t c)
+{
+ slsr_cand_t basis = lookup_cand (c->basis);
+
+ if (operand_equal_p (c->base_name, basis->base_name, 0))
+ return double_int_sub (c->index, basis->index);
+ else
+ return c->index;
+}
+
+/* Insert a NOP_EXPR converting NAME to TO_TYPE, prior to
+ the statement referenced by candidate C. */
+
+static tree
+convert_name (tree name, slsr_cand_t c, tree to_type)
+{
+ tree new_name = create_tmp_reg (to_type, "slsr");
+ gimple nop_stmt;
+ add_referenced_var (new_name);
+ new_name = make_ssa_name (new_name, NULL);
+ nop_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_name,
+ name, NULL_TREE);
+ gimple_set_location (nop_stmt, gimple_location (c->cand_stmt));
+ gsi_insert_before (&c->cand_gsi, nop_stmt, GSI_SAME_STMT);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Inserting: ", dump_file);
+ print_gimple_stmt (dump_file, nop_stmt, 0, 0);
+ }
+
+ return new_name;
+}
+
+/* Replace candidate C, each sibling of candidate C, and each
+ dependent of candidate C with an add or subtract. */
+
+static void
+replace_dependents (slsr_cand_t c)
+{
+ slsr_cand_t basis = lookup_cand (c->basis);
+ tree basis_name = gimple_assign_lhs (basis->cand_stmt);
+ tree incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt));
+ double_int stride = tree_to_double_int (c->stride);
+ double_int increment = double_int_mul (cand_increment (c), stride);
+
+ /* It is highly unlikely, but possible, that the resulting
+ increment doesn't fit in a HWI. Abandon the replacement
+ in this case. This does not affect siblings or dependents
+ of C. */
+ /* Restriction to signed HWI is conservative for unsigned types
+ but allows for safe negation without twisted logic. */
+ if (double_int_fits_in_shwi_p (increment))
+ {
+ enum tree_code code = PLUS_EXPR;
+ tree orig_type
+ = TREE_TYPE (SSA_NAME_VAR (gimple_assign_rhs1 (c->cand_stmt)));
+ tree repl_type = TREE_TYPE (SSA_NAME_VAR (basis_name));
+ tree incr_tree;
+
+ if (double_int_negative_p (increment))
+ {
+ code = MINUS_EXPR;
+ increment = double_int_neg (increment);
+ }
+
+ incr_tree = double_int_to_tree (incr_type, increment);
+
+ /* A widening cast may be necessary. */
+ if (orig_type != repl_type)
+ basis_name = convert_name (basis_name, c, orig_type);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("Replacing: ", dump_file);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ }
+
+ gimple_assign_set_rhs_with_ops (&c->cand_gsi, code,
+ basis_name, incr_tree);
+ update_stmt (c->cand_stmt);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fputs ("With: ", dump_file);
+ print_gimple_stmt (dump_file, c->cand_stmt, 0, 0);
+ fputs ("\n", dump_file);
+ }
+
+ /* The multiply we just converted to an add might well feed
+ into one or more other multiplies in the candidate table.
+ If so, we need to update those candidate entries. */
+ update_chained_candidates (c->cand_stmt);
+ }
+
+ if (c->sibling)
+ replace_dependents (lookup_cand (c->sibling));
+
+ if (c->dependent)
+ replace_dependents (lookup_cand (c->dependent));
+}
+
+/* Analyze costs of related candidates in the candidate vector,
+ and make beneficial replacements. */
+
+static void
+analyze_candidates_and_replace (void)
+{
+ unsigned i;
+ slsr_cand_t c;
+
+ /* Each candidate that has a null basis and a non-null
+ dependent is the root of a tree of related statements.
+ Analyze each tree to determine a subset of those
+ statements that can be replaced with maximum benefit. */
+ FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c)
+ {
+ slsr_cand_t first_dep;
+
+ if (c->basis != 0 || c->dependent == 0)
+ continue;
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
+ c->cand_num);
+
+ first_dep = lookup_cand (c->dependent);
+
+ /* If the common stride of all related candidates is a
+ known constant, and none of these has a phi-dependence,
+ then all replacements are considered profitable.
+ Each replaces a multiply by a single add, with the
+ possibility that a feeding add also goes dead as a
+ result. */
+ if (unconditional_cands_with_known_stride_p (c))
+ replace_dependents (first_dep);
+
+ /* TODO: When the stride is an SSA name, it may still
+ be profitable to replace some or all of the dependent
+ candidates, depending on whether the introduced increments
+ can be reused. */
+
+ /* TODO: When conditional increments occur so that a
+ candidate is dependent upon a phi-basis, the cost of
+ introducing a temporary must be accounted for. */
+
+ /* TODO: Strength reduction candidates hidden in
+ addressing expressions are not yet handled, and will
+ have more complex cost functions. */
+ }
+}
+
+/* Main entry point for straight-line strength reduction. */
+
+static unsigned
+execute_strength_reduction (void)
+{
+ /* Create the allocation pool where candidates will reside. */
+ cand_pool = create_alloc_pool ("Strength reduction pool",
+ sizeof (slsr_cand), 128);
+
+ /* Allocate the candidate vector. */
+ cand_vec = VEC_alloc (slsr_cand_t, heap, 128);
+
+ /* Allocate the mapping from statements to candidate indices. */
+ stmt_cand_map = htab_create (500, stmt_cand_hash,
+ stmt_cand_eq, stmt_cand_free);
+
+ /* Initialize the loop optimizer. We need to detect flow across
+ back edges, and this gives us dominator information as well. */
+ loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+ /* Walk the CFG in predominator order looking for strength reduction
+ candidates. */
+ find_candidates_in_block (ENTRY_BLOCK_PTR);
+
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ dump_cand_vec ();
+
+ /* Analyze costs and make appropriate replacements. */
+ analyze_candidates_and_replace ();
+
+ /* Free resources. */
+ loop_optimizer_finalize ();
+ htab_delete (stmt_cand_map);
+ VEC_free (slsr_cand_t, heap, cand_vec);
+ free_alloc_pool (cand_pool);
+
+ return 0;
+}
+
+static bool
+gate_strength_reduction (void)
+{
+ return optimize > 0;
+}
+
+struct gimple_opt_pass pass_strength_reduction =
+{
+ {
+ GIMPLE_PASS,
+ "slsr", /* name */
+ gate_strength_reduction, /* gate */
+ execute_strength_reduction, /* execute */
+ NULL, /* sub */
+ NULL, /* next */
+ 0, /* static_pass_number */
+ TV_TREE_SLSR, /* tv_id */
+ PROP_cfg | PROP_ssa, /* properties_required */
+ 0, /* properties_provided */
+ 0, /* properties_destroyed */
+ 0, /* todo_flags_start */
+ TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */
+ }
+};
===================================================================
@@ -1416,6 +1416,7 @@ OBJS = \
tree-ssa-reassoc.o \
tree-ssa-sccvn.o \
tree-ssa-sink.o \
+ tree-ssa-strength-reduction.o \
tree-ssa-strlen.o \
tree-ssa-structalias.o \
tree-ssa-tail-merge.o \
@@ -2421,6 +2422,10 @@ tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H)
alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) langhooks.h $(HASHTAB_H) $(GIMPLE_H) \
$(TREE_INLINE_H) tree-iterator.h tree-ssa-propagate.h tree-ssa-sccvn.h \
$(PARAMS_H) tree-pretty-print.h gimple-pretty-print.h gimple-fold.h
+tree-ssa-strength-reduction.o : tree-ssa-strength-reduction.c $(CONFIG_H) \
+ $(SYSTEM_H) coretypes.h $(TREE_H) $(GIMPLE_H) $(BASIC_BLOCK_H) \
+ $(TREE_PASS_H) $(TIMEVAR_H) $(CFGLOOP_H) tree-pretty-print.h alloc-pool.h \
+ $(TREE_FLOW_H)
tree-vrp.o : tree-vrp.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \
$(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \
$(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \
===================================================================
@@ -1371,6 +1371,7 @@ init_optimization_passes (void)
}
NEXT_PASS (pass_lower_vector_ssa);
NEXT_PASS (pass_cse_reciprocals);
+ NEXT_PASS (pass_strength_reduction);
NEXT_PASS (pass_reassoc);
NEXT_PASS (pass_vrp);
NEXT_PASS (pass_dominator);