diff mbox

[1/6] Loop flattening on loop-SSA.

Message ID 1288306702-5543-2-git-send-email-sebpop@gmail.com
State New
Headers show

Commit Message

Sebastian Pop Oct. 28, 2010, 10:58 p.m. UTC
2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>

	* Makefile.in (OBJS-common): Add tree-loop-flattening.o.
	(tree-loop-flattening.o): New.
	* common.opt (ftree-loop-flatten): New.
	* dbgcnt.def (lflat): New.
	* params.def (PARAM_LFLAT_MAX_NB_BBS): New.
	* passes.c (init_optimization_passes): Add new passes
	pass_flatten_loops and pass_if_conversion after loop vectorization
	and before pass_slp_vectorize.
	* timevar.def (TV_TREE_LOOP_FLATTENING): New.
	* tree-loop-flattening.c: New.
	* tree-pass.h (pass_flatten_loops): Declared.

	* gcc.dg/tree-ssa/flat-loop-1.c: New.
	* gcc.dg/tree-ssa/flat-loop-2.c: New.
	* gcc.dg/tree-ssa/flat-loop-3.c: New.
	* gcc.dg/tree-ssa/flat-loop-4.c: New.
---
 gcc/ChangeLog                               |   14 +
 gcc/Makefile.in                             |    4 +
 gcc/common.opt                              |    4 +
 gcc/dbgcnt.def                              |    1 +
 gcc/params.def                              |    7 +
 gcc/passes.c                                |    2 +
 gcc/testsuite/ChangeLog                     |    7 +
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c |   28 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c |   39 ++
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c |   19 +
 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c |   23 +
 gcc/timevar.def                             |    1 +
 gcc/tree-loop-flattening.c                  |  625 +++++++++++++++++++++++++++
 gcc/tree-pass.h                             |    1 +
 14 files changed, 775 insertions(+), 0 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
 create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
 create mode 100644 gcc/tree-loop-flattening.c
diff mbox

Patch

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index beed454..a0148d2 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,17 @@ 
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
+	* Makefile.in (OBJS-common): Add tree-loop-flattening.o.
+	(tree-loop-flattening.o): New.
+	* common.opt (ftree-loop-flatten): New.
+	* dbgcnt.def (lflat): New.
+	* params.def (PARAM_LFLAT_MAX_NB_BBS): New.
+	* passes.c (init_optimization_passes): Add new passes
+	pass_flatten_loops and pass_if_conversion after loop vectorization
+	and before pass_slp_vectorize.
+	* timevar.def (TV_TREE_LOOP_FLATTENING): New.
+	* tree-loop-flattening.c: New.
+	* tree-pass.h (pass_flatten_loops): Declared.
+
 2010-10-20  Nathan Froyd  <froydnj@codesourcery.com>
 
 	* ifcvt.c (noce_emit_cmove): If both of the values are SUBREGs, try
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 898e962..55b67f4 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1368,6 +1368,7 @@  OBJS-common = \
 	tree-into-ssa.o \
 	tree-iterator.o \
 	tree-loop-distribution.o \
+	tree-loop-flattening.o \
 	tree-loop-linear.o \
 	tree-nested.o \
 	tree-nrv.o \
@@ -2773,6 +2774,9 @@  tree-loop-distribution.o: tree-loop-distribution.c $(CONFIG_H) $(SYSTEM_H) coret
    $(DIAGNOSTIC_H) $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    $(TREE_PASS_H) $(TREE_DATA_REF_H) $(EXPR_H) \
    langhooks.h $(TREE_VECTORIZER_H)
+tree-loop-flattening.o: tree-loop-flattening.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+   $(TM_H) $(OPTABS_H) $(TREE_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) $(TREE_FLOW_H) \
+   $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) $(TREE_PASS_H) $(DBGCNT_H)
 tree-parloops.o: tree-parloops.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
    $(TREE_FLOW_H) $(TREE_H) $(CFGLOOP_H) $(TREE_DATA_REF_H) \
    $(DIAGNOSTIC_H) $(TREE_PASS_H) langhooks.h gt-tree-parloops.h \
diff --git a/gcc/common.opt b/gcc/common.opt
index 8fe796f..c969979 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1632,6 +1632,10 @@  ftree-loop-distribute-patterns
 Common Report Var(flag_tree_loop_distribute_patterns) Optimization
 Enable loop distribution for patterns transformed into a library call
 
+ftree-loop-flatten
+Common Report Var(flag_tree_loop_flattening) Optimization
+Enable loop flattening on trees
+
 ftree-loop-im
 Common Report Var(flag_tree_loop_im) Init(1) Optimization
 Enable loop invariant motion on trees
diff --git a/gcc/dbgcnt.def b/gcc/dbgcnt.def
index 0492d66..0ef9a72 100644
--- a/gcc/dbgcnt.def
+++ b/gcc/dbgcnt.def
@@ -166,6 +166,7 @@  DEBUG_COUNTER (if_conversion_tree)
 DEBUG_COUNTER (if_after_combine)
 DEBUG_COUNTER (if_after_reload)
 DEBUG_COUNTER (local_alloc_for_sched)
+DEBUG_COUNTER (lflat)
 DEBUG_COUNTER (postreload_cse)
 DEBUG_COUNTER (pre)
 DEBUG_COUNTER (pre_insn)
diff --git a/gcc/params.def b/gcc/params.def
index 49a6185..3fffc35 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -788,6 +788,13 @@  DEFPARAM (PARAM_GRAPHITE_MAX_BBS_PER_FUNCTION,
 	  "maximum number of basic blocks per function to be analyzed by Graphite",
 	  100, 0, 0)
 
+/* Maximal number of basic blocks in a loop to be flattened.  */
+
+DEFPARAM (PARAM_LFLAT_MAX_NB_BBS,
+	  "lflat-max-nb-bbs",
+	  "maximum number of basic blocks in a loop to be flattened",
+	  100, 0, 0)
+
 /* Avoid doing loop invariant motion on very large loops.  */
 
 DEFPARAM (PARAM_LOOP_INVARIANT_MAX_BBS_IN_LOOP,
diff --git a/gcc/passes.c b/gcc/passes.c
index 1308ce9..4b778bc 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -913,6 +913,8 @@  init_optimization_passes (void)
 	    }
           NEXT_PASS (pass_predcom);
 	  NEXT_PASS (pass_complete_unroll);
+	  NEXT_PASS (pass_flatten_loops);
+	  NEXT_PASS (pass_if_conversion);
 	  NEXT_PASS (pass_slp_vectorize);
 	  NEXT_PASS (pass_parallelize_loops);
 	  NEXT_PASS (pass_loop_prefetch);
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 9d9c543..8ab520e 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,10 @@ 
+2010-10-20  Sebastian Pop  <sebastian.pop@amd.com>
+
+	* gcc.dg/tree-ssa/flat-loop-1.c: New.
+	* gcc.dg/tree-ssa/flat-loop-2.c: New.
+	* gcc.dg/tree-ssa/flat-loop-3.c: New.
+	* gcc.dg/tree-ssa/flat-loop-4.c: New.
+
 2010-10-20  Rainer Orth  <ro@CeBiTec.Uni-Bielefeld.DE>
 
 	PR c++/46024
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
new file mode 100644
index 0000000..bee8a2b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-1.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+struct stack_segment
+{
+  struct dynamic_allocation_blocks *dynamic_allocation;
+};
+struct dynamic_allocation_blocks
+{
+  struct dynamic_allocation_blocks *next;
+};
+static struct dynamic_allocation_blocks *
+merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
+		      struct dynamic_allocation_blocks *b)
+{
+  struct dynamic_allocation_blocks **pp;
+  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
+    *pp = b;
+  return a;
+}
+__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
+{
+  struct dynamic_allocation_blocks *ret;
+  struct stack_segment *pss;
+  pss = *pp;
+  while (pss != ((void *)0))
+    ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
new file mode 100644
index 0000000..a7287fb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-2.c
@@ -0,0 +1,39 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+struct stack_segment
+{
+  struct stack_segment *next;
+  struct dynamic_allocation_blocks *dynamic_allocation;
+};
+struct dynamic_allocation_blocks
+{
+  struct dynamic_allocation_blocks *next;
+};
+static struct dynamic_allocation_blocks *
+merge_dynamic_blocks (struct dynamic_allocation_blocks *a,
+        struct dynamic_allocation_blocks *b)
+{
+  struct dynamic_allocation_blocks **pp;
+  if (b == ((void *)0))
+  for (pp = &a->next; *pp != ((void *)0); pp = &(*pp)->next)
+    ;
+  return a;
+}
+__morestack_release_segments (struct stack_segment **pp, int free_dynamic)
+{
+  struct dynamic_allocation_blocks *ret;
+  struct stack_segment *pss;
+  while (pss != ((void *)0))
+    {
+      struct stack_segment *next;
+      next = pss->next;
+ {
+   if (free_dynamic)
+     {
+       ret = merge_dynamic_blocks (pss->dynamic_allocation, ret);
+     }
+ }
+      pss = next;
+    }
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
new file mode 100644
index 0000000..d3d66ab
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-3.c
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+
+int
+split_directories (const char *name, int *ptr_num_dirs)
+{
+  int num_dirs = 0;
+  char **dirs;
+  const char *p, *q;
+  int ch;
+  while ((ch = *p++) != '\0')
+    {
+   num_dirs++;
+   while (((*p) == '/'))
+     p++;
+    }
+  return (dirs[num_dirs - 1] == ((void *)0));
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
new file mode 100644
index 0000000..8e551ac
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/flat-loop-4.c
@@ -0,0 +1,23 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-flatten" } */
+
+void
+formatted_backspace (int common, char *s)
+{
+  int base;
+  int n;
+  do
+    {
+      if (sseek (s, base, 0) < 0)
+	goto io_error;
+
+      while (n > 0)
+	{
+          n--;
+	  base += n + 1;
+	}
+    }
+  while (base != 0);
+ io_error:
+  generate_error (common, 0, ((void *)0));
+}
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 86e2999..89ff8e8 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -152,6 +152,7 @@  DEFTIMEVAR (TV_GRAPHITE_DATA_DEPS    , "Graphite data dep analysis")
 DEFTIMEVAR (TV_GRAPHITE_CODE_GEN     , "Graphite code generation")
 DEFTIMEVAR (TV_TREE_LINEAR_TRANSFORM , "tree loop linear")
 DEFTIMEVAR (TV_TREE_LOOP_DISTRIBUTION, "tree loop distribution")
+DEFTIMEVAR (TV_TREE_LOOP_FLATTENING  , "tree loop flattening")
 DEFTIMEVAR (TV_CHECK_DATA_DEPS       , "tree check data dependences")
 DEFTIMEVAR (TV_TREE_PREFETCH	     , "tree prefetching")
 DEFTIMEVAR (TV_TREE_LOOP_IVOPTS	     , "tree iv optimization")
diff --git a/gcc/tree-loop-flattening.c b/gcc/tree-loop-flattening.c
new file mode 100644
index 0000000..826e7e8
--- /dev/null
+++ b/gcc/tree-loop-flattening.c
@@ -0,0 +1,625 @@ 
+/* Loop flattening.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+   Contributed by Sebastian Pop <sebastian.pop@amd.com>.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "ggc.h"
+#include "tree.h"
+#include "rtl.h"
+#include "output.h"
+#include "basic-block.h"
+#include "diagnostic.h"
+#include "tree-flow.h"
+#include "toplev.h"
+#include "tree-dump.h"
+#include "timevar.h"
+#include "cfgloop.h"
+#include "tree-pass.h"
+#include "gimple.h"
+#include "params.h"
+#include "dbgcnt.h"
+
+/* This loop flattening pass transforms backward pointing edges into
+   forward pointing edges.
+
+   The back-edge removal transformation was described in the 1983
+   paper by Allen J. R., Ken Kennedy, Carrie Porterfield, and Joe
+   Warren: "Conversion of control dependence to data dependence"
+   available from http://doi.acm.org/10.1145/567067.567085
+
+   The back-edge removal algorithm was presented in that paper as part
+   of the if-conversion algorithm for backward pointing edges.  In
+   this section we will first provide a description of this technique
+   adapted for the Gimple-SSA form, followed by an example, and a
+   discussion of the differences with the higher level loop flattening
+   transformation.
+
+   The back-edge removal algorithm transforms control dependences into
+   data dependences by using a boolean variable.  The values taken by
+   the boolean variable control the execution path of the forward
+   edges created in order to use the back-edge of an outer loop.
+
+   The first step of the algorithm detects a surrounding loop and all
+   the back-edges of the loop body: these back-edges can be inner
+   loops or strongly connected components of the CFG that cannot be
+   reduced to natural loops.
+
+   Each back-edge is removed by redirecting the target of the
+   back-edge to the latch basic block of the surrounding loop.  A
+   boolean variable is created in the latch.  It is cleared when the
+   redirected back-edge is taken and it is set to true for any other
+   paths leading to the latch.
+
+   The header basic block of the surrounding loop is split before its
+   statements and a new condition is added based on the control
+   variable: when the control variable is set to true, the execution
+   proceeds as normal to the basic block that contains the statements
+   of the header; when the control variable is cleared, meaning that
+   the back-edge has been taken, the execution proceeds to the point
+   where the redirected back-edge was pointing.
+
+   The last step updates the SSA form after all the back-edges have
+   been redirected to the latch, and the new edges from the header to
+   the destination of back-edges have been created.
+
+   Another description of loop flattening in a very Fortran specific
+   way is in the 1992 paper by Reinhard von Hanxleden and Ken Kennedy:
+   "Relaxing SIMD Control Flow Constraints using Loop Transformations"
+   available from
+   http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 */
+
+/* Keep the loop structure for LOOP and remove all the loop structures
+   under LOOP.  */
+
+static void
+cancel_subloops (loop_p loop)
+{
+  int i;
+  loop_p li;
+  VEC (loop_p, heap) *lv = VEC_alloc (loop_p, heap, 3);
+
+  for (li = loop->inner; li; li = li->next)
+    VEC_safe_push (loop_p, heap, lv, li);
+
+  FOR_EACH_VEC_ELT (loop_p, lv, i, li)
+    cancel_loop_tree (li);
+
+  VEC_free (loop_p, heap, lv);
+}
+
+/* Before creating other phi nodes in LOOP->header for the control
+   flags, update the phi nodes of LOOP->header and add the necessary
+   phi nodes in the LOOP->latch that now contains several paths on
+   which the values are not updated.  PRED_E is the single edge that
+   was pointing to the LOOP->latch basic block before inner back-edges
+   were redirected to the LOOP->latch.  */
+
+static void
+update_loop_phi_nodes (loop_p loop, edge pred_e)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      edge e;
+      edge_iterator ei;
+      gimple phi = gsi_stmt (gsi);
+      tree back_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+      tree res = gimple_phi_result (phi);
+      tree var = SSA_NAME_VAR (res);
+
+      phi = create_phi_node (var, loop->latch);
+      create_new_def_for (gimple_phi_result (phi), phi,
+			  gimple_phi_result_ptr (phi));
+
+      FOR_EACH_EDGE (e, ei, loop->latch->preds)
+	add_phi_arg (phi, (e == pred_e ? back_arg : res),
+		     e, UNKNOWN_LOCATION);
+
+      res = gimple_phi_result (phi);
+      add_phi_arg (gsi_stmt (gsi), res, loop_latch_edge (loop),
+		   UNKNOWN_LOCATION);
+    }
+}
+
+/* Creates a control flag for the FORWARDED_EDGE that represents the
+   back-edge that has been forwarded to the latch basic block of LOOP.
+   INNER_BODY is the basic block to which the back-edge was pointing
+   before redirection.  This function creates a boolean control flag
+   that is cleared when the FORWARDED_EDGE is taken and set for all
+   the other paths.  This function adds the corresponding phi nodes in
+   LOOP->latch and LOOP->header, and finally adds an edge from
+   LOOP->header to the INNER_BODY guarded by the control flag.  */
+
+static void
+create_control_flag (edge forwarded_edge, loop_p loop, basic_block inner_body)
+{
+  edge e, preheader;
+  edge outer_latch_e = loop_latch_edge (loop);
+  const char *name = "_flat_";
+  tree var = create_tmp_var (boolean_type_node, name);
+  tree res;
+  gimple phi, cond_stmt;
+  gimple_stmt_iterator gsi;
+  edge_iterator ei;
+
+  /* Adds a control variable for the redirected FORWARDED_EDGE.  */
+  add_referenced_var (var);
+  phi = create_phi_node (var, forwarded_edge->dest);
+  create_new_def_for (gimple_phi_result (phi), phi,
+		      gimple_phi_result_ptr (phi));
+
+  FOR_EACH_EDGE (e, ei, outer_latch_e->src->preds)
+    add_phi_arg (phi, (e == forwarded_edge
+		       ? boolean_false_node
+		       : boolean_true_node),
+		 e, UNKNOWN_LOCATION);
+  res = gimple_phi_result (phi);
+
+  /* Add a phi node in LOOP->header for the control variable.  */
+  phi = create_phi_node (var, loop->header);
+  create_new_def_for (gimple_phi_result (phi), phi,
+		      gimple_phi_result_ptr (phi));
+
+  preheader = loop_preheader_edge (loop);
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    add_phi_arg (phi, (e == preheader
+		       ? boolean_true_node
+		       : res),
+		 e, UNKNOWN_LOCATION);
+  res = gimple_phi_result (phi);
+
+  /* Split LOOP->header to insert the control variable condition.  */
+  e = split_block_after_labels (loop->header);
+  e->flags = EDGE_TRUE_VALUE;
+  e = make_edge (loop->header, inner_body, EDGE_FALSE_VALUE);
+  cond_stmt = gimple_build_cond (EQ_EXPR, res, boolean_true_node,
+				 NULL_TREE, NULL_TREE);
+  gsi = gsi_last_bb (loop->header);
+  gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
+}
+
+/* Adds phi nodes to the LOOP->header and LOOP->latch for the ssa_name
+   NAME.  ARG is the argument of the latch phi node set for the
+   FORWARDED_EDGE, and all the other edges merged by the latch phi
+   node are set to the result of the LOOP->header phi node.  The latch
+   edge of the LOOP->header phi node is set to the result of the
+   LOOP->latch phi node, and the other argument is set to an arbitrary
+   valid value defined before the loop (note that this initial value
+   is never used in the loop).  Returns the LOOP->header phi result.  */
+
+static tree
+add_header_and_latch_phis (loop_p loop, tree name, edge forwarded_edge,
+			   tree arg)
+{
+  edge e;
+  edge_iterator ei;
+  tree res, zero, var = SSA_NAME_VAR (name);
+  gimple loop_phi = create_phi_node (var, loop->header);
+  gimple latch_phi = create_phi_node (var, loop->latch);
+
+  create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
+		      gimple_phi_result_ptr (loop_phi));
+  create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
+		      gimple_phi_result_ptr (latch_phi));
+
+  /* The value set to ZERO will never be used in the loop, however we
+     have to construct something meaningful for virtual SSA_NAMEs.  */
+  if (TREE_CODE (arg) != SSA_NAME)
+    zero = arg;
+  else if (is_gimple_reg (arg))
+    zero = fold_convert (TREE_TYPE (arg), integer_zero_node);
+  else
+    zero = gimple_default_def (cfun, SSA_NAME_VAR (arg));
+
+  res = gimple_phi_result (latch_phi);
+  FOR_EACH_EDGE (e, ei, loop->header->preds)
+    add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
+		 e, UNKNOWN_LOCATION);
+
+  res = gimple_phi_result (loop_phi);
+  FOR_EACH_EDGE (e, ei, loop->latch->preds)
+    add_phi_arg (latch_phi, (e == forwarded_edge ? arg : res),
+		 e, UNKNOWN_LOCATION);
+
+  return res;
+}
+
+/* Creates phi nodes for each inductive definition, i.e., loop phi
+   nodes.  For each induction phi node in the old loop header, i.e.,
+   in the single_succ (INNER_BODY), insert a phi node in the
+   LOOP->latch that takes the updated value of the induction on the
+   FORWARDED_EDGE, and maintains the same value as in the phi node of
+   the LOOP->header for all the other possible paths reaching
+   LOOP->latch.  This function has to be called after all the
+   back-edges have been redirected.  */
+
+static void
+update_inner_induction_phi_nodes (edge forwarded_edge, loop_p loop,
+				  basic_block inner_body)
+{
+  gimple_stmt_iterator gsi;
+
+  for (gsi = gsi_start_phis (single_succ (inner_body));
+       !gsi_end_p (gsi); gsi_next (&gsi))
+    {
+      gimple old_loop_phi = gsi_stmt (gsi);
+      tree back_arg = PHI_ARG_DEF_FROM_EDGE (old_loop_phi,
+					     single_succ_edge (inner_body));
+      tree res = gimple_phi_result (old_loop_phi);
+
+      res = add_header_and_latch_phis (loop, res, forwarded_edge, back_arg);
+      add_phi_arg (old_loop_phi, res, single_succ_edge (inner_body),
+		   UNKNOWN_LOCATION);
+    }
+}
+
+/* Renames all the uses of OLD_NAME with NEW_NAME (except the phi
+   nodes of DEF_BB) in all the basic blocks dominated by DEF_BB and in
+   the arguments of all the phi nodes originating in a basic block
+   that is dominated by DEF_BB.  */
+
+static void
+rename_dominated_uses (loop_p loop, tree old_name, tree new_name,
+		       basic_block def_bb)
+{
+  imm_use_iterator uit;
+  gimple stmt;
+  use_operand_p use_p;
+  ssa_op_iter op_iter;
+
+  FOR_EACH_IMM_USE_STMT (stmt, uit, old_name)
+    {
+      enum gimple_code code = gimple_code (stmt);
+      basic_block use_bb = gimple_bb (stmt);
+      edge_iterator ei;
+      edge e;
+
+      if (code == GIMPLE_PHI)
+	{
+	  FOR_EACH_EDGE (e, ei, use_bb->preds)
+	    if (PHI_ARG_DEF_FROM_EDGE (stmt, e) == old_name
+		&& dominated_by_p (CDI_DOMINATORS, e->src, def_bb)
+		&& use_bb != def_bb)
+	      replace_exp (gimple_phi_arg_imm_use_ptr (stmt, e->dest_idx),
+			   new_name);
+	}
+      else
+	{
+	  if (!dominated_by_p (CDI_DOMINATORS, use_bb, def_bb))
+	    continue;
+
+	  if (use_bb->loop_father == loop)
+	    {
+	      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
+		if (USE_FROM_PTR (use_p) == old_name)
+		  replace_exp (use_p, new_name);
+	    }
+	  else
+	    /* Virtual operands are not translated into loop closed
+	       SSA form, and thus they may occur in the rest of
+	       the program without a loop close vphi node.  */
+	    FOR_EACH_SSA_USE_OPERAND (use_p, stmt, op_iter, SSA_OP_ALL_USES)
+	      if (USE_FROM_PTR (use_p) == old_name)
+		replace_exp (use_p, new_name);
+	}
+    }
+}
+
+/* Helper function for add_missing_phi_nodes_1.  Adds to LOOP all the
+   missing phi nodes for NAME and updates the arguments of the
+   LATCH_PHI node.  LOOP_PHI node is the inductive definition of NAME
+   in LOOP->header.  */
+
+static void
+add_missing_phi_nodes_2 (loop_p loop, tree name, tree old_name,
+			 VEC (gimple, heap) *phis)
+{
+  unsigned i;
+  basic_block bb, dom_bb = gimple_bb (SSA_NAME_DEF_STMT (name));
+  VEC (basic_block, heap) *dom_bbs = get_all_dominated_blocks (CDI_DOMINATORS,
+							       dom_bb);
+
+  FOR_EACH_VEC_ELT (basic_block, dom_bbs, i, bb)
+    {
+      edge e;
+      edge_iterator ei;
+
+      if (bb == loop->latch
+	  || bb->loop_father != loop)
+	continue;
+
+      FOR_EACH_EDGE (e, ei, bb->succs)
+	{
+	  gimple phi = VEC_index (gimple, phis, e->dest->index);
+
+	  if (phi)
+	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
+
+	  else if (!single_pred_p (e->dest)
+		   && !dominated_by_p (CDI_DOMINATORS, e->dest, dom_bb)
+		   && e->dest->loop_father == loop)
+	  {
+	    tree var = SSA_NAME_VAR (name);
+
+	    phi = create_phi_node (var, e->dest);
+	    create_new_def_for (gimple_phi_result (phi), phi,
+				gimple_phi_result_ptr (phi));
+	    VEC_replace (gimple, phis, e->dest->index, phi);
+	    add_phi_arg (phi, name, e, UNKNOWN_LOCATION);
+	    rename_dominated_uses (loop, old_name, gimple_phi_result (phi),
+				   e->dest);
+	    add_missing_phi_nodes_2 (loop, gimple_phi_result (phi), old_name,
+				     phis);
+	  }
+	}
+    }
+}
+
+/* Helper function for add_missing_phi_nodes.  For all the definitions
+   of DEF_STMT add the missing phi nodes in LOOP.  */
+
+static void
+add_missing_phi_nodes_1 (loop_p loop, gimple def_stmt)
+{
+  def_operand_p def_p;
+  ssa_op_iter op_iter;
+  basic_block bb = gimple_bb (def_stmt);
+
+  FOR_EACH_PHI_OR_STMT_DEF (def_p, def_stmt, op_iter, SSA_OP_DEF|SSA_OP_VDEF)
+    {
+      edge e;
+      edge_iterator ei;
+      tree res, zero, var;
+      gimple loop_phi, latch_phi, use_stmt;
+      imm_use_iterator uit;
+      tree name = DEF_FROM_PTR (def_p);
+      bool needs_update = false;
+      VEC (gimple, heap) *phis;
+      int i;
+
+      FOR_EACH_IMM_USE_STMT (use_stmt, uit, name)
+	{
+	  basic_block use_bb = gimple_bb (use_stmt);
+
+	  if (!dominated_by_p (CDI_DOMINATORS, bb, use_bb))
+	    {
+	      needs_update = true;
+	      BREAK_FROM_IMM_USE_STMT (uit);
+	    }
+	}
+
+      if (!needs_update)
+	continue;
+
+      var = SSA_NAME_VAR (name);
+      loop_phi = create_phi_node (var, loop->header);
+      latch_phi = create_phi_node (var, loop->latch);
+
+      create_new_def_for (gimple_phi_result (loop_phi), loop_phi,
+			  gimple_phi_result_ptr (loop_phi));
+      create_new_def_for (gimple_phi_result (latch_phi), latch_phi,
+			  gimple_phi_result_ptr (latch_phi));
+
+      /* The value set to ZERO will never be used in the loop, however we
+	 have to construct something meaningful for virtual SSA_NAMEs.  */
+      if (is_gimple_reg (name))
+	zero = fold_convert (TREE_TYPE (name), integer_zero_node);
+      else
+	zero = gimple_default_def (cfun, SSA_NAME_VAR (name));
+
+      res = gimple_phi_result (latch_phi);
+      FOR_EACH_EDGE (e, ei, loop->header->preds)
+	add_phi_arg (loop_phi, (e == loop_latch_edge (loop) ? res : zero),
+		     e, UNKNOWN_LOCATION);
+
+      res = gimple_phi_result (loop_phi);
+      FOR_EACH_EDGE (e, ei, loop->latch->preds)
+	add_phi_arg (latch_phi, res, e, UNKNOWN_LOCATION);
+
+      phis = VEC_alloc (gimple, heap, n_basic_blocks);
+      for (i = 0; i < n_basic_blocks; i++)
+	VEC_quick_push (gimple, phis, NULL);
+
+      VEC_replace (gimple, phis, loop->latch->index, latch_phi);
+      VEC_replace (gimple, phis, loop->header->index, loop_phi);
+      add_missing_phi_nodes_2 (loop, name, name, phis);
+
+      for (i = 0; i < n_basic_blocks; i++)
+	{
+	  gimple phi = VEC_index (gimple, phis, i);
+
+	  if (!phi)
+	    continue;
+
+	  FOR_EACH_EDGE (e, ei, BASIC_BLOCK (i)->preds)
+	    if (!PHI_ARG_DEF_FROM_EDGE (phi, e))
+	      add_phi_arg (phi, res, e, UNKNOWN_LOCATION);
+	}
+
+      VEC_free (gimple, heap, phis);
+    }
+}
+
+/* Walks over the code of LOOP and adds the missing phi nodes at
+   control flow junctions.  When a variable is defined in an outer
+   loop and used in an inner loop, the definition dominates the use.
+   After the loop flattening, the inner loop body is directly
+   reachable from the LOOP->header by using the added edge guarded by
+   the boolean flag that controls the execution of the back-edge that
+   was eliminated.  In this case, the use is not dominated by the
+   definition, and this function adds the missing phi nodes.  */
+
+static void
+add_missing_phi_nodes (loop_p loop)
+{
+  gimple_stmt_iterator gsi;
+  int i, n = loop->num_nodes;
+  basic_block *bbs = get_loop_body (loop);
+
+  for (i = 0; i < n; i++)
+    {
+      basic_block bb = bbs[i];
+
+      /* LOOP->header dominates all the blocks of the loop body, and
+	 so we don't have to look at the missing phi nodes for the
+	 definitions of LOOP->header.  */
+      if (bb == loop->header)
+	continue;
+
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	if (!gimple_nop_p (gsi_stmt (gsi)))
+	  add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
+
+      for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	add_missing_phi_nodes_1 (loop, gsi_stmt (gsi));
+    }
+
+  free (bbs);
+}
+
+/* Removes all the back-edges of LOOP except its own back-edge.  */
+
+static unsigned
+flatten_loop (loop_p loop)
+{
+  int i, n = loop->num_nodes;
+  basic_block *bbs;
+  VEC (edge, heap) *back_edges;
+  VEC (basic_block, heap) *loop_body;
+  edge_iterator ei;
+  edge e, pred_e;
+  unsigned max_nb_basic_blocks = PARAM_VALUE (PARAM_LFLAT_MAX_NB_BBS);;
+
+  if (loop->num_nodes > max_nb_basic_blocks
+      || !single_exit (loop)
+      || !dbg_cnt (lflat))
+    return 0;
+
+  mark_dfs_back_edges ();
+  bbs = get_loop_body (loop);
+
+  back_edges = VEC_alloc (edge, heap, 3);
+  loop_body = VEC_alloc (basic_block, heap, 3);
+
+  for (i = 0; i < n; i++)
+    FOR_EACH_EDGE (e, ei, bbs[i]->succs)
+      if (e->flags & EDGE_DFS_BACK
+	  && e->src != loop->latch)
+	VEC_safe_push (edge, heap, back_edges, e);
+
+  free (bbs);
+
+  /* Early return and do not modify the code when there are no back
+     edges.  */
+  if (VEC_empty (edge, back_edges))
+    return 0;
+
+  cancel_subloops (loop);
+
+  /* Split the latch edge to make sure that the latch basic block does
+     not contain code.  */
+  loop->latch = split_edge (loop_latch_edge (loop));
+  pred_e = single_pred_edge (loop->latch);
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    {
+      basic_block dest = split_edge (e);
+
+      /* Redirect BACK_EDGE to LOOP->latch.  */
+      redirect_edge_and_branch_force (e, loop->latch);
+
+      /* Save the basic block where it was pointing.  */
+      VEC_safe_push (basic_block, heap, loop_body, dest);
+    }
+
+  update_loop_phi_nodes (loop, pred_e);
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    create_control_flag (e, loop, VEC_index (basic_block, loop_body, i));
+
+  FOR_EACH_VEC_ELT (edge, back_edges, i, e)
+    update_inner_induction_phi_nodes (e, loop, VEC_index (basic_block,
+							  loop_body, i));
+
+  free_dominance_info (CDI_DOMINATORS);
+  calculate_dominance_info (CDI_DOMINATORS);
+  add_missing_phi_nodes (loop);
+
+  /* If we redirected some back-edges, split the latch edge to create
+     an empty LOOP->latch.  */
+  if (!single_pred_p (loop->latch))
+    loop->latch = split_edge (loop_latch_edge (loop));
+
+  return TODO_update_ssa | TODO_verify_ssa;
+}
+
+/* Flattens all the loops of the current function.  */
+
+static unsigned int
+tree_loop_flattening (void)
+{
+  unsigned todo = 0;
+  loop_p loop;
+  loop_iterator li;
+
+  if (number_of_loops () <= 1)
+    return 0;
+
+  FOR_EACH_LOOP (li, loop, 0)
+    todo |= flatten_loop (loop);
+
+#ifdef ENABLE_CHECKING
+  verify_dominators (CDI_DOMINATORS);
+  verify_flow_info ();
+#endif
+
+  cleanup_tree_cfg ();
+  return todo;
+}
+
+static bool
+gate_tree_loop_flattening (void)
+{
+  return flag_tree_loop_flattening != 0;
+}
+
+struct gimple_opt_pass pass_flatten_loops =
+{
+ {
+  GIMPLE_PASS,
+  "lflat",				/* name */
+  gate_tree_loop_flattening,		/* gate */
+  tree_loop_flattening,       		/* execute */
+  NULL,					/* sub */
+  NULL,					/* next */
+  0,					/* static_pass_number */
+  TV_TREE_LOOP_FLATTENING,  		/* tv_id */
+  PROP_cfg | PROP_ssa,			/* properties_required */
+  0,					/* properties_provided */
+  0,					/* properties_destroyed */
+  0,					/* todo_flags_start */
+  TODO_dump_func
+    | TODO_update_ssa
+    | TODO_ggc_collect			/* todo_flags_finish */
+ }
+};
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a87a770..e2f257f 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -374,6 +374,7 @@  extern struct gimple_opt_pass pass_graphite;
 extern struct gimple_opt_pass pass_graphite_transforms;
 extern struct gimple_opt_pass pass_if_conversion;
 extern struct gimple_opt_pass pass_loop_distribution;
+extern struct gimple_opt_pass pass_flatten_loops;
 extern struct gimple_opt_pass pass_vectorize;
 extern struct gimple_opt_pass pass_slp_vectorize;
 extern struct gimple_opt_pass pass_complete_unroll;