diff mbox

Gimple loop splitting v2

Message ID alpine.LSU.2.20.1512021420420.13533@wotan.suse.de
State New
Headers show

Commit Message

Michael Matz Dec. 2, 2015, 1:23 p.m. UTC
Hi,

On Tue, 1 Dec 2015, Jeff Law wrote:

> > So, okay for trunk?
> -ENOPATCH

Sigh :)
Here it is.


Ciao,
Michael.
	* common.opt (-fsplit-loops): New flag.
	* passes.def (pass_loop_split): Add.
	* opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3.
	(enable_fdo_optimizations): Add loop splitting.
	* timevar.def (TV_LOOP_SPLIT): Add.
	* tree-pass.h (make_pass_loop_split): Declare.
	* tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare.
	* tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h,
	* tree-ssa-loop-split.c: New file.
	* Makefile.in (OBJS): Add tree-ssa-loop-split.o.
	* doc/invoke.texi (fsplit-loops): Document.
	* doc/passes.texi (Loop optimization): Add paragraph about loop
	splitting.

testsuite/
	* gcc.dg/loop-split.c: New test.

Comments

Jeff Law Dec. 5, 2015, 7:55 a.m. UTC | #1
On 12/02/2015 06:23 AM, Michael Matz wrote:
> Hi,
>
> On Tue, 1 Dec 2015, Jeff Law wrote:
>
>>> So, okay for trunk?
>> -ENOPATCH
>
> Sigh :)
> Here it is.
>
>
> Ciao,
> Michael.
> 	* common.opt (-fsplit-loops): New flag.
> 	* passes.def (pass_loop_split): Add.
> 	* opts.c (default_options_table): Add OPT_fsplit_loops entry at -O3.
> 	(enable_fdo_optimizations): Add loop splitting.
> 	* timevar.def (TV_LOOP_SPLIT): Add.
> 	* tree-pass.h (make_pass_loop_split): Declare.
> 	* tree-ssa-loop-manip.h (rewrite_into_loop_closed_ssa_1): Declare.
> 	* tree-ssa-loop-unswitch.c: Include tree-ssa-loop-manip.h,
> 	* tree-ssa-loop-split.c: New file.
> 	* Makefile.in (OBJS): Add tree-ssa-loop-split.o.
> 	* doc/invoke.texi (fsplit-loops): Document.
> 	* doc/passes.texi (Loop optimization): Add paragraph about loop
> 	splitting.
>
> testsuite/
> 	* gcc.dg/loop-split.c: New test.
>
> Index: tree-ssa-loop-split.c

> +/* Return true when BB inside LOOP is a potential iteration space
> +   split point, i.e. ends with a condition like "IV < comp", which
> +   is true on one side of the iteration space and false on the other,
> +   and the split point can be computed.  If so, also return the border
> +   point in *BORDER and the comparison induction variable in IV.  */
> +
> +static tree
> +split_at_bb_p (struct loop *loop, basic_block bb, tree *border, affine_iv *iv)
> +{
> +  gimple *last;
> +  gcond *stmt;
> +  affine_iv iv2;
> +
  +
> +  /* Make it so, that the first argument of the condition is
> +     the looping one (only swap.  */
Nit.  I don't think you want a comma after "so".  And it looks like your 
comment got truncated as well.

With the comment above fixed, this is fine for the trunk.

jeff
Michael Matz Oct. 20, 2016, 2:43 p.m. UTC | #2
Hi,

On Sat, 5 Dec 2015, Jeff Law wrote:

> Nit.  I don't think you want a comma after "so".  And it looks like your
> comment got truncated as well.
> 
> With the comment above fixed, this is fine for the trunk.

I'm terribly sorry to have dropped the ball here, but I've committed this 
now after not even a year ;-/ (r241374)  Obviously after rebootstrapping 
with all,ada languages.  I also did some benchmark run which should be 
taken with a grain of salt as the machine had fairly variant results but 
the improvements are real, though perhaps not always in that range (it's a 
normal three repeats run).  I'm really curious if our automatic tester can 
pick up similar improvements, because if so, it's extreme (5 to 15 percent 
in some benchmarks) and we can brag about it for GCC 7 ;-)

400.perlbench    9770        519       18.8 *    9770   508       19.2 *  
401.bzip2        9650        668       14.5 *    9650   666       14.5 *  
403.gcc          8050        455       17.7 *    8050   432       18.6 *  
429.mcf          9120        477       19.1 *    9120   467       19.5 *  
445.gobmk       10490        643       16.3 *   10490   644       16.3 *  
456.hmmer        9330        641       14.6 *    9330   614       15.2 *  
458.sjeng       12100        784       15.4 *   12100   762       15.9 *  
462.libquantum  20720        605       34.2 *   20720   600       34.5 *  
464.h264ref     22130        969       22.8 *   22130   969       22.8 *  
471.omnetpp      6250        438       14.3 *    6250   358       17.5 *  
473.astar        7020        494       14.2 *    7020   492       14.3 *  
483.xalancbmk    6900        342       20.2 *    6900   336       20.6 *  
 Est. SPECint(R)_base2006              17.9
 Est. SPECint2006                                                 18.5

410.bwaves      13590        563       24.1 *   13590   506       26.9 *  
416.gamess                                  NR                         NR 
433.milc         9180        375       24.5 *    9180   349       26.3 *  
434.zeusmp       9100        433       21.0 *    9100   423       21.5 *  
435.gromacs      7140        402       17.7 *    7140   411       17.4 *  
436.cactusADM   11950        486       24.6 *   11950   486       24.6 *  
437.leslie3d     9400        421       22.4 *    9400   419       22.4 *  
444.namd         8020        520       15.4 *    8020   520       15.4 *  
447.dealII                                  NR                         NR 
450.soplex       8340        393       21.2 *    8340   391       21.3 *  
453.povray       5320        277       19.2 *    5320   278       19.1 *  
454.calculix     8250        453       18.2 *    8250   460       17.9 *  
459.GemsFDTD    10610        542       19.6 *   10610   537       19.8 *  
465.tonto        9840        492       20.0 *    9840   491       20.0 *  
470.lbm         13740        466       29.5 *   13740   430       32.0 *  
481.wrf         11170        492       22.7 *   11170   457       24.4 *  
482.sphinx3     19490        659       29.6 *   19490   655       29.8 *  
 Est. SPECfp(R)_base2006               21.6
 Est. SPECfp2006                                                  22.1


Ciao,
Michael.
Bin.Cheng Oct. 20, 2016, 2:55 p.m. UTC | #3
On Thu, Oct 20, 2016 at 3:43 PM, Michael Matz <matz@suse.de> wrote:
> Hi,
>
> On Sat, 5 Dec 2015, Jeff Law wrote:
>
>> Nit.  I don't think you want a comma after "so".  And it looks like your
>> comment got truncated as well.
>>
>> With the comment above fixed, this is fine for the trunk.
>
> I'm terribly sorry to have dropped the ball here, but I've committed this
> now after not even a year ;-/ (r241374)  Obviously after rebootstrapping
> with all,ada languages.  I also did some benchmark run which should be
> taken with a grain of salt as the machine had fairly variant results but
> the improvements are real, though perhaps not always in that range (it's a
> normal three repeats run).  I'm really curious if our automatic tester can
> pick up similar improvements, because if so, it's extreme (5 to 15 percent
> in some benchmarks) and we can brag about it for GCC 7 ;-)
This is nice, thanks for doing it.  I will check the improvement on AArch64.

Thanks,
bin
>
> 400.perlbench    9770        519       18.8 *    9770   508       19.2 *
> 401.bzip2        9650        668       14.5 *    9650   666       14.5 *
> 403.gcc          8050        455       17.7 *    8050   432       18.6 *
> 429.mcf          9120        477       19.1 *    9120   467       19.5 *
> 445.gobmk       10490        643       16.3 *   10490   644       16.3 *
> 456.hmmer        9330        641       14.6 *    9330   614       15.2 *
> 458.sjeng       12100        784       15.4 *   12100   762       15.9 *
> 462.libquantum  20720        605       34.2 *   20720   600       34.5 *
> 464.h264ref     22130        969       22.8 *   22130   969       22.8 *
> 471.omnetpp      6250        438       14.3 *    6250   358       17.5 *
> 473.astar        7020        494       14.2 *    7020   492       14.3 *
> 483.xalancbmk    6900        342       20.2 *    6900   336       20.6 *
>  Est. SPECint(R)_base2006              17.9
>  Est. SPECint2006                                                 18.5
>
> 410.bwaves      13590        563       24.1 *   13590   506       26.9 *
> 416.gamess                                  NR                         NR
> 433.milc         9180        375       24.5 *    9180   349       26.3 *
> 434.zeusmp       9100        433       21.0 *    9100   423       21.5 *
> 435.gromacs      7140        402       17.7 *    7140   411       17.4 *
> 436.cactusADM   11950        486       24.6 *   11950   486       24.6 *
> 437.leslie3d     9400        421       22.4 *    9400   419       22.4 *
> 444.namd         8020        520       15.4 *    8020   520       15.4 *
> 447.dealII                                  NR                         NR
> 450.soplex       8340        393       21.2 *    8340   391       21.3 *
> 453.povray       5320        277       19.2 *    5320   278       19.1 *
> 454.calculix     8250        453       18.2 *    8250   460       17.9 *
> 459.GemsFDTD    10610        542       19.6 *   10610   537       19.8 *
> 465.tonto        9840        492       20.0 *    9840   491       20.0 *
> 470.lbm         13740        466       29.5 *   13740   430       32.0 *
> 481.wrf         11170        492       22.7 *   11170   457       24.4 *
> 482.sphinx3     19490        659       29.6 *   19490   655       29.8 *
>  Est. SPECfp(R)_base2006               21.6
>  Est. SPECfp2006                                                  22.1
>
>
> Ciao,
> Michael.
Jeff Law Oct. 20, 2016, 7:17 p.m. UTC | #4
On 10/20/2016 08:43 AM, Michael Matz wrote:
> Hi,
>
> On Sat, 5 Dec 2015, Jeff Law wrote:
>
>> Nit.  I don't think you want a comma after "so".  And it looks like your
>> comment got truncated as well.
>>
>> With the comment above fixed, this is fine for the trunk.
>
> I'm terribly sorry to have dropped the ball here, but I've committed this
> now after not even a year ;-/ (r241374)
It'd totally fallen off my radar.  I had to go find it in my archives :-).




  Obviously after rebootstrapping
> with all,ada languages.  I also did some benchmark run which should be
> taken with a grain of salt as the machine had fairly variant results but
> the improvements are real, though perhaps not always in that range (it's a
> normal three repeats run).  I'm really curious if our automatic tester can
> pick up similar improvements, because if so, it's extreme (5 to 15 percent
> in some benchmarks) and we can brag about it for GCC 7 ;-)
Yea.  I don't expect it applies that often and ISTM that it's probably 
most beneficial by enabling other stuff later in the loop optimizer 
pipeline to see more loops without embedded flow control.


ANyway, glad to see it go in.

jeff
Bin.Cheng Oct. 24, 2016, 8:44 a.m. UTC | #5
On Thu, Oct 20, 2016 at 3:55 PM, Bin.Cheng <amker.cheng@gmail.com> wrote:
> On Thu, Oct 20, 2016 at 3:43 PM, Michael Matz <matz@suse.de> wrote:
>> Hi,
>>
>> On Sat, 5 Dec 2015, Jeff Law wrote:
>>
>>> Nit.  I don't think you want a comma after "so".  And it looks like your
>>> comment got truncated as well.
>>>
>>> With the comment above fixed, this is fine for the trunk.
>>
>> I'm terribly sorry to have dropped the ball here, but I've committed this
>> now after not even a year ;-/ (r241374)  Obviously after rebootstrapping
>> with all,ada languages.  I also did some benchmark run which should be
>> taken with a grain of salt as the machine had fairly variant results but
>> the improvements are real, though perhaps not always in that range (it's a
>> normal three repeats run).  I'm really curious if our automatic tester can
>> pick up similar improvements, because if so, it's extreme (5 to 15 percent
>> in some benchmarks) and we can brag about it for GCC 7 ;-)
> This is nice, thanks for doing it.  I will check the improvement on AArch64.
Hi,
Unfortunately I didn't reproduce the improvement in my run on AArch64,
I will double check if I made some mistakes.

Thanks,
bin
>>
>> 400.perlbench    9770        519       18.8 *    9770   508       19.2 *
>> 401.bzip2        9650        668       14.5 *    9650   666       14.5 *
>> 403.gcc          8050        455       17.7 *    8050   432       18.6 *
>> 429.mcf          9120        477       19.1 *    9120   467       19.5 *
>> 445.gobmk       10490        643       16.3 *   10490   644       16.3 *
>> 456.hmmer        9330        641       14.6 *    9330   614       15.2 *
>> 458.sjeng       12100        784       15.4 *   12100   762       15.9 *
>> 462.libquantum  20720        605       34.2 *   20720   600       34.5 *
>> 464.h264ref     22130        969       22.8 *   22130   969       22.8 *
>> 471.omnetpp      6250        438       14.3 *    6250   358       17.5 *
>> 473.astar        7020        494       14.2 *    7020   492       14.3 *
>> 483.xalancbmk    6900        342       20.2 *    6900   336       20.6 *
>>  Est. SPECint(R)_base2006              17.9
>>  Est. SPECint2006                                                 18.5
>>
>> 410.bwaves      13590        563       24.1 *   13590   506       26.9 *
>> 416.gamess                                  NR                         NR
>> 433.milc         9180        375       24.5 *    9180   349       26.3 *
>> 434.zeusmp       9100        433       21.0 *    9100   423       21.5 *
>> 435.gromacs      7140        402       17.7 *    7140   411       17.4 *
>> 436.cactusADM   11950        486       24.6 *   11950   486       24.6 *
>> 437.leslie3d     9400        421       22.4 *    9400   419       22.4 *
>> 444.namd         8020        520       15.4 *    8020   520       15.4 *
>> 447.dealII                                  NR                         NR
>> 450.soplex       8340        393       21.2 *    8340   391       21.3 *
>> 453.povray       5320        277       19.2 *    5320   278       19.1 *
>> 454.calculix     8250        453       18.2 *    8250   460       17.9 *
>> 459.GemsFDTD    10610        542       19.6 *   10610   537       19.8 *
>> 465.tonto        9840        492       20.0 *    9840   491       20.0 *
>> 470.lbm         13740        466       29.5 *   13740   430       32.0 *
>> 481.wrf         11170        492       22.7 *   11170   457       24.4 *
>> 482.sphinx3     19490        659       29.6 *   19490   655       29.8 *
>>  Est. SPECfp(R)_base2006               21.6
>>  Est. SPECfp2006                                                  22.1
>>
>>
>> Ciao,
>> Michael.
Michael Matz Oct. 24, 2016, 9:02 a.m. UTC | #6
Hi,

On Mon, 24 Oct 2016, Bin.Cheng wrote:

> Unfortunately I didn't reproduce the improvement in my run on AArch64, I 
> will double check if I made some mistakes.

Yeah, our regular testers also didn't pick up these kinds of improvements.  
As I said, the machine was quite jumpy (though not loaded at all, and 
fixated to run on one CPU) :-/


Ciao,
Michael.
Tamar Christina Oct. 25, 2016, 4:41 p.m. UTC | #7
Hi Michael,

The commit seems to be causing an ICE on aarch64 (just tested latest trunk).

I've created a Bugzilla ticket with a test input https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78107

Regards,
Tamar

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Michael Matz
> Sent: 24 October 2016 10:02
> To: Bin.Cheng
> Cc: Jeff Law; gcc-patches List
> Subject: Re: Gimple loop splitting v2
> 
> Hi,
> 
> On Mon, 24 Oct 2016, Bin.Cheng wrote:
> 
> > Unfortunately I didn't reproduce the improvement in my run on AArch64,
> > I will double check if I made some mistakes.
> 
> Yeah, our regular testers also didn't pick up these kinds of improvements.
> As I said, the machine was quite jumpy (though not loaded at all, and fixated
> to run on one CPU) :-/
> 
> 
> Ciao,
> Michael.
diff mbox

Patch

Index: common.opt
===================================================================
--- common.opt	(revision 231115)
+++ common.opt	(working copy)
@@ -2453,6 +2457,10 @@  funswitch-loops
 Common Report Var(flag_unswitch_loops) Optimization
 Perform loop unswitching.
 
+fsplit-loops
+Common Report Var(flag_split_loops) Optimization
+Perform loop splitting.
+
 funwind-tables
 Common Report Var(flag_unwind_tables) Optimization
 Just generate unwind tables for exception handling.
Index: passes.def
===================================================================
--- passes.def	(revision 231115)
+++ passes.def	(working copy)
@@ -252,6 +252,7 @@  along with GCC; see the file COPYING3.
 	  NEXT_PASS (pass_dce);
 	  NEXT_PASS (pass_tree_unswitch);
 	  NEXT_PASS (pass_scev_cprop);
+	  NEXT_PASS (pass_loop_split);
 	  NEXT_PASS (pass_record_bounds);
 	  NEXT_PASS (pass_loop_distribution);
 	  NEXT_PASS (pass_copy_prop);
Index: opts.c
===================================================================
--- opts.c	(revision 231115)
+++ opts.c	(working copy)
@@ -532,6 +532,7 @@  static const struct default_options defa
        regardless of them being declared inline.  */
     { OPT_LEVELS_3_PLUS_AND_SIZE, OPT_finline_functions, NULL, 1 },
     { OPT_LEVELS_1_PLUS_NOT_DEBUG, OPT_finline_functions_called_once, NULL, 1 },
+    { OPT_LEVELS_3_PLUS, OPT_fsplit_loops, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_funswitch_loops, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
     { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
@@ -1411,6 +1412,8 @@  enable_fdo_optimizations (struct gcc_opt
     opts->x_flag_ipa_cp_alignment = value;
   if (!opts_set->x_flag_predictive_commoning)
     opts->x_flag_predictive_commoning = value;
+  if (!opts_set->x_flag_split_loops)
+    opts->x_flag_split_loops = value;
   if (!opts_set->x_flag_unswitch_loops)
     opts->x_flag_unswitch_loops = value;
   if (!opts_set->x_flag_gcse_after_reload)
Index: timevar.def
===================================================================
--- timevar.def	(revision 231115)
+++ timevar.def	(working copy)
@@ -182,6 +182,7 @@  DEFTIMEVAR (TV_LIM                   , "
 DEFTIMEVAR (TV_TREE_LOOP_IVCANON     , "tree canonical iv")
 DEFTIMEVAR (TV_SCEV_CONST            , "scev constant prop")
 DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH    , "tree loop unswitching")
+DEFTIMEVAR (TV_LOOP_SPLIT            , "loop splitting")
 DEFTIMEVAR (TV_COMPLETE_UNROLL       , "complete unrolling")
 DEFTIMEVAR (TV_TREE_PARALLELIZE_LOOPS, "tree parallelize loops")
 DEFTIMEVAR (TV_TREE_VECTORIZATION    , "tree vectorization")
Index: tree-pass.h
===================================================================
--- tree-pass.h	(revision 231115)
+++ tree-pass.h	(working copy)
@@ -370,6 +370,7 @@  extern gimple_opt_pass *make_pass_tree_n
 extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_predcom (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_iv_canon (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_scev_cprop (gcc::context *ctxt);
Index: tree-ssa-loop-manip.h
===================================================================
--- tree-ssa-loop-manip.h	(revision 231115)
+++ tree-ssa-loop-manip.h	(working copy)
@@ -24,6 +24,8 @@  typedef void (*transform_callback)(struc
 
 extern void create_iv (tree, tree, tree, struct loop *, gimple_stmt_iterator *,
 		       bool, tree *, tree *);
+extern void rewrite_into_loop_closed_ssa_1 (bitmap, unsigned, int,
+					    struct loop *);
 extern void rewrite_into_loop_closed_ssa (bitmap, unsigned);
 extern void rewrite_virtuals_into_loop_closed_ssa (struct loop *);
 extern void verify_loop_closed_ssa (bool);
Index: Makefile.in
===================================================================
--- Makefile.in	(revision 231115)
+++ Makefile.in	(working copy)
@@ -1474,6 +1474,7 @@  OBJS = \
 	tree-ssa-loop-manip.o \
 	tree-ssa-loop-niter.o \
 	tree-ssa-loop-prefetch.o \
+	tree-ssa-loop-split.o \
 	tree-ssa-loop-unswitch.o \
 	tree-ssa-loop.o \
 	tree-ssa-math-opts.o \
Index: tree-ssa-loop-split.c
===================================================================
--- tree-ssa-loop-split.c	(revision 0)
+++ tree-ssa-loop-split.c	(working copy)
@@ -0,0 +1,686 @@ 
+/* Loop splitting.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "fold-const.h"
+#include "tree-cfg.h"
+#include "tree-ssa.h"
+#include "tree-ssa-loop-niter.h"
+#include "tree-ssa-loop.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-into-ssa.h"
+#include "cfgloop.h"
+#include "tree-scalar-evolution.h"
+#include "gimple-iterator.h"
+#include "gimple-pretty-print.h"
+#include "cfghooks.h"
+#include "gimple-fold.h"
+#include "gimplify-me.h"
+
+/* This file implements loop splitting, i.e. transformation of loops like
+
+   for (i = 0; i < 100; i++)
+     {
+       if (i < 50)
+         A;
+       else
+         B;
+     }
+
+   into:
+
+   for (i = 0; i < 50; i++)
+     {
+       A;
+     }
+   for (; i < 100; i++)
+     {
+       B;
+     }
+
+   */
+
+/* Return true when BB inside LOOP is a potential iteration space
+   split point, i.e. ends with a condition like "IV < comp", which
+   is true on one side of the iteration space and false on the other,
+   and the split point can be computed.  If so, also return the border
+   point in *BORDER and the comparison induction variable in IV.  */
+
+static tree
+split_at_bb_p (struct loop *loop, basic_block bb, tree *border, affine_iv *iv)
+{
+  gimple *last;
+  gcond *stmt;
+  affine_iv iv2;
+
+  /* BB must end in a simple conditional jump.  */
+  last = last_stmt (bb);
+  if (!last || gimple_code (last) != GIMPLE_COND)
+    return NULL_TREE;
+  stmt = as_a <gcond *> (last);
+
+  enum tree_code code = gimple_cond_code (stmt);
+
+  /* Only handle relational comparisons, for equality and non-equality
+     we'd have to split the loop into two loops and a middle statement.  */
+  switch (code)
+    {
+      case LT_EXPR:
+      case LE_EXPR:
+      case GT_EXPR:
+      case GE_EXPR:
+	break;
+      default:
+	return NULL_TREE;
+    }
+
+  if (loop_exits_from_bb_p (loop, bb))
+    return NULL_TREE;
+
+  tree op0 = gimple_cond_lhs (stmt);
+  tree op1 = gimple_cond_rhs (stmt);
+
+  if (!simple_iv (loop, loop, op0, iv, false))
+    return NULL_TREE;
+  if (!simple_iv (loop, loop, op1, &iv2, false))
+    return NULL_TREE;
+
+  /* Make it so, that the first argument of the condition is
+     the looping one (only swap.  */
+  if (!integer_zerop (iv2.step))
+    {
+      std::swap (op0, op1);
+      std::swap (*iv, iv2);
+      code = swap_tree_comparison (code);
+      gimple_cond_set_condition (stmt, code, op0, op1);
+      update_stmt (stmt);
+    }
+  else if (integer_zerop (iv->step))
+    return NULL_TREE;
+  if (!integer_zerop (iv2.step))
+    return NULL_TREE;
+
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Found potential split point: ");
+      print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+      fprintf (dump_file, " { ");
+      print_generic_expr (dump_file, iv->base, TDF_SLIM);
+      fprintf (dump_file, " + I*");
+      print_generic_expr (dump_file, iv->step, TDF_SLIM);
+      fprintf (dump_file, " } %s ", get_tree_code_name (code));
+      print_generic_expr (dump_file, iv2.base, TDF_SLIM);
+      fprintf (dump_file, "\n");
+    }
+
+  *border = iv2.base;
+  return op0;
+}
+
+/* Given a GUARD conditional stmt inside LOOP, which we want to make always
+   true or false depending on INITIAL_TRUE, and adjusted values NEXTVAL
+   (a post-increment IV) and NEWBOUND (the comparator) adjust the loop
+   exit test statement to loop back only if the GUARD statement will
+   also be true/false in the next iteration.  */
+
+static void
+patch_loop_exit (struct loop *loop, gcond *guard, tree nextval, tree newbound,
+		 bool initial_true)
+{
+  edge exit = single_exit (loop);
+  gcond *stmt = as_a <gcond *> (last_stmt (exit->src));
+  gimple_cond_set_condition (stmt, gimple_cond_code (guard),
+			     nextval, newbound);
+  update_stmt (stmt);
+
+  edge stay = single_pred_edge (loop->latch);
+
+  exit->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+  stay->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE);
+
+  if (initial_true)
+    {
+      exit->flags |= EDGE_FALSE_VALUE;
+      stay->flags |= EDGE_TRUE_VALUE;
+    }
+  else
+    {
+      exit->flags |= EDGE_TRUE_VALUE;
+      stay->flags |= EDGE_FALSE_VALUE;
+    }
+}
+
+/* Give an induction variable GUARD_IV, and its affine descriptor IV,
+   find the loop phi node in LOOP defining it directly, or create
+   such phi node.  Return that phi node.  */
+
+static gphi *
+find_or_create_guard_phi (struct loop *loop, tree guard_iv, affine_iv * /*iv*/)
+{
+  gimple *def = SSA_NAME_DEF_STMT (guard_iv);
+  gphi *phi;
+  if ((phi = dyn_cast <gphi *> (def))
+      && gimple_bb (phi) == loop->header)
+    return phi;
+
+  /* XXX Create the PHI instead.  */
+  return NULL;
+}
+
+/* This function updates the SSA form after connect_loops made a new
+   edge NEW_E leading from LOOP1 exit to LOOP2 (via in intermediate
+   conditional).  I.e. the second loop can now be entered either
+   via the original entry or via NEW_E, so the entry values of LOOP2
+   phi nodes are either the original ones or those at the exit
+   of LOOP1.  Insert new phi nodes in LOOP2 pre-header reflecting
+   this.  */
+
+static void
+connect_loop_phis (struct loop *loop1, struct loop *loop2, edge new_e)
+{
+  basic_block rest = loop_preheader_edge (loop2)->src;
+  gcc_assert (new_e->dest == rest);
+  edge skip_first = EDGE_PRED (rest, EDGE_PRED (rest, 0) == new_e);
+
+  edge firste = loop_preheader_edge (loop1);
+  edge seconde = loop_preheader_edge (loop2);
+  edge firstn = loop_latch_edge (loop1);
+  gphi_iterator psi_first, psi_second;
+  for (psi_first = gsi_start_phis (loop1->header),
+       psi_second = gsi_start_phis (loop2->header);
+       !gsi_end_p (psi_first);
+       gsi_next (&psi_first), gsi_next (&psi_second))
+    {
+      tree init, next, new_init;
+      use_operand_p op;
+      gphi *phi_first = psi_first.phi ();
+      gphi *phi_second = psi_second.phi ();
+
+      init = PHI_ARG_DEF_FROM_EDGE (phi_first, firste);
+      next = PHI_ARG_DEF_FROM_EDGE (phi_first, firstn);
+      op = PHI_ARG_DEF_PTR_FROM_EDGE (phi_second, seconde);
+      gcc_assert (operand_equal_for_phi_arg_p (init, USE_FROM_PTR (op)));
+
+      /* Prefer using original variable as a base for the new ssa name.
+	 This is necessary for virtual ops, and useful in order to avoid
+	 losing debug info for real ops.  */
+      if (TREE_CODE (next) == SSA_NAME
+	  && useless_type_conversion_p (TREE_TYPE (next),
+					TREE_TYPE (init)))
+	new_init = copy_ssa_name (next);
+      else if (TREE_CODE (init) == SSA_NAME
+	       && useless_type_conversion_p (TREE_TYPE (init),
+					     TREE_TYPE (next)))
+	new_init = copy_ssa_name (init);
+      else if (useless_type_conversion_p (TREE_TYPE (next),
+					  TREE_TYPE (init)))
+	new_init = make_temp_ssa_name (TREE_TYPE (next), NULL,
+				       "unrinittmp");
+      else
+	new_init = make_temp_ssa_name (TREE_TYPE (init), NULL,
+				       "unrinittmp");
+
+      gphi * newphi = create_phi_node (new_init, rest);
+      add_phi_arg (newphi, init, skip_first, UNKNOWN_LOCATION);
+      add_phi_arg (newphi, next, new_e, UNKNOWN_LOCATION);
+      SET_USE (op, new_init);
+    }
+}
+
+/* The two loops LOOP1 and LOOP2 were just created by loop versioning,
+   they are still equivalent and placed in two arms of a diamond, like so:
+
+               .------if (cond)------.
+               v                     v
+             pre1                   pre2
+              |                      |
+        .--->h1                     h2<----.
+        |     |                      |     |
+        |    ex1---.            .---ex2    |
+        |    /     |            |     \    |
+        '---l1     X            |     l2---'
+                   |            |
+                   |            |
+                   '--->join<---'
+
+   This function transforms the program such that LOOP1 is conditionally
+   falling through to LOOP2, or skipping it.  This is done by splitting
+   the ex1->join edge at X in the diagram above, and inserting a condition
+   whose one arm goes to pre2, resulting in this situation:
+   
+               .------if (cond)------.
+               v                     v
+             pre1       .---------->pre2
+              |         |            |
+        .--->h1         |           h2<----.
+        |     |         |            |     |
+        |    ex1---.    |       .---ex2    |
+        |    /     v    |       |     \    |
+        '---l1   skip---'       |     l2---'
+                   |            |
+                   |            |
+                   '--->join<---'
+
+   
+   The condition used is the exit condition of LOOP1, which effectively means
+   that when the first loop exits (for whatever reason) but the real original
+   exit expression is still false the second loop will be entered.
+   The function returns the new edge cond->pre2.
+   
+   This doesn't update the SSA form, see connect_loop_phis for that.  */
+
+static edge
+connect_loops (struct loop *loop1, struct loop *loop2)
+{
+  edge exit = single_exit (loop1);
+  basic_block skip_bb = split_edge (exit);
+  gcond *skip_stmt;
+  gimple_stmt_iterator gsi;
+  edge new_e, skip_e;
+
+  gimple *stmt = last_stmt (exit->src);
+  skip_stmt = gimple_build_cond (gimple_cond_code (stmt),
+				 gimple_cond_lhs (stmt),
+				 gimple_cond_rhs (stmt),
+				 NULL_TREE, NULL_TREE);
+  gsi = gsi_last_bb (skip_bb);
+  gsi_insert_after (&gsi, skip_stmt, GSI_NEW_STMT);
+
+  skip_e = EDGE_SUCC (skip_bb, 0);
+  skip_e->flags &= ~EDGE_FALLTHRU;
+  new_e = make_edge (skip_bb, loop_preheader_edge (loop2)->src, 0);
+  if (exit->flags & EDGE_TRUE_VALUE)
+    {
+      skip_e->flags |= EDGE_TRUE_VALUE;
+      new_e->flags |= EDGE_FALSE_VALUE;
+    }
+  else
+    {
+      skip_e->flags |= EDGE_FALSE_VALUE;
+      new_e->flags |= EDGE_TRUE_VALUE;
+    }
+
+  new_e->count = skip_bb->count;
+  new_e->probability = PROB_LIKELY;
+  new_e->count = apply_probability (skip_e->count, PROB_LIKELY);
+  skip_e->count -= new_e->count;
+  skip_e->probability = inverse_probability (PROB_LIKELY);
+
+  return new_e;
+}
+
+/* This returns the new bound for iterations given the original iteration
+   space in NITER, an arbitrary new bound BORDER, assumed to be some
+   comparison value with a different IV, the initial value GUARD_INIT of
+   that other IV, and the comparison code GUARD_CODE that compares
+   that other IV with BORDER.  We return an SSA name, and place any
+   necessary statements for that computation into *STMTS.
+
+   For example for such a loop:
+
+     for (i = beg, j = guard_init; i < end; i++, j++)
+       if (j < border)  // this is supposed to be true/false
+         ...
+
+   we want to return a new bound (on j) that makes the loop iterate
+   as long as the condition j < border stays true.  We also don't want
+   to iterate more often than the original loop, so we have to introduce
+   some cut-off as well (via min/max), effectively resulting in:
+
+     newend = min (end+guard_init-beg, border)
+     for (i = beg; j = guard_init; j < newend; i++, j++)
+       if (j < c)
+         ...
+
+   Depending on the direction of the IVs and if the exit tests
+   are strict or non-strict we need to use MIN or MAX,
+   and add or subtract 1.  This routine computes newend above.  */
+
+static tree
+compute_new_first_bound (gimple_seq *stmts, struct tree_niter_desc *niter,
+			 tree border,
+			 enum tree_code guard_code, tree guard_init)
+{
+  /* The niter structure contains the after-increment IV, we need
+     the loop-enter base, so subtract STEP once.  */
+  tree controlbase = force_gimple_operand (niter->control.base,
+					   stmts, true, NULL_TREE);
+  tree controlstep = niter->control.step;
+  tree enddiff;
+  if (POINTER_TYPE_P (TREE_TYPE (controlbase)))
+    {
+      controlstep = gimple_build (stmts, NEGATE_EXPR,
+				  TREE_TYPE (controlstep), controlstep);
+      enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
+			      TREE_TYPE (controlbase),
+			      controlbase, controlstep);
+    }
+  else
+    enddiff = gimple_build (stmts, MINUS_EXPR,
+			    TREE_TYPE (controlbase),
+			    controlbase, controlstep);
+
+  /* Compute beg-guard_init.  */
+  if (POINTER_TYPE_P (TREE_TYPE (enddiff)))
+    {
+      tree tem = gimple_convert (stmts, sizetype, guard_init);
+      tem = gimple_build (stmts, NEGATE_EXPR, sizetype, tem);
+      enddiff = gimple_build (stmts, POINTER_PLUS_EXPR,
+			      TREE_TYPE (enddiff),
+			      enddiff, tem);
+    }
+  else
+    enddiff = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
+			    enddiff, guard_init);
+
+  /* Compute end-(beg-guard_init).  */
+  gimple_seq stmts2;
+  tree newbound = force_gimple_operand (niter->bound, &stmts2,
+					true, NULL_TREE);
+  gimple_seq_add_seq_without_update (stmts, stmts2);
+
+  if (POINTER_TYPE_P (TREE_TYPE (enddiff))
+      || POINTER_TYPE_P (TREE_TYPE (newbound)))
+    {
+      enddiff = gimple_convert (stmts, sizetype, enddiff);
+      enddiff = gimple_build (stmts, NEGATE_EXPR, sizetype, enddiff);
+      newbound = gimple_build (stmts, POINTER_PLUS_EXPR,
+			       TREE_TYPE (newbound),
+			       newbound, enddiff);
+    }
+  else
+    newbound = gimple_build (stmts, MINUS_EXPR, TREE_TYPE (enddiff),
+			     newbound, enddiff);
+
+  /* Depending on the direction of the IVs the new bound for the first
+     loop is the minimum or maximum of old bound and border.
+     Also, if the guard condition isn't strictly less or greater,
+     we need to adjust the bound.  */ 
+  int addbound = 0;
+  enum tree_code minmax;
+  if (niter->cmp == LT_EXPR)
+    {
+      /* GT and LE are the same, inverted.  */
+      if (guard_code == GT_EXPR || guard_code == LE_EXPR)
+	addbound = -1;
+      minmax = MIN_EXPR;
+    }
+  else
+    {
+      gcc_assert (niter->cmp == GT_EXPR);
+      if (guard_code == GE_EXPR || guard_code == LT_EXPR)
+	addbound = 1;
+      minmax = MAX_EXPR;
+    }
+
+  if (addbound)
+    {
+      tree type2 = TREE_TYPE (newbound);
+      if (POINTER_TYPE_P (type2))
+	type2 = sizetype;
+      newbound = gimple_build (stmts,
+			       POINTER_TYPE_P (TREE_TYPE (newbound))
+			       ? POINTER_PLUS_EXPR : PLUS_EXPR,
+			       TREE_TYPE (newbound),
+			       newbound,
+			       build_int_cst (type2, addbound));
+    }
+
+  tree newend = gimple_build (stmts, minmax, TREE_TYPE (border),
+			      border, newbound);
+  return newend;
+}
+
+/* Checks if LOOP contains an conditional block whose condition
+   depends on which side in the iteration space it is, and if so
+   splits the iteration space into two loops.  Returns true if the
+   loop was split.  NITER must contain the iteration descriptor for the
+   single exit of LOOP.  */
+
+static bool
+split_loop (struct loop *loop1, struct tree_niter_desc *niter)
+{
+  basic_block *bbs;
+  unsigned i;
+  bool changed = false;
+  tree guard_iv;
+  tree border;
+  affine_iv iv;
+
+  bbs = get_loop_body (loop1);
+
+  /* Find a splitting opportunity.  */
+  for (i = 0; i < loop1->num_nodes; i++)
+    if ((guard_iv = split_at_bb_p (loop1, bbs[i], &border, &iv)))
+      {
+	/* Handling opposite steps is not implemented yet.  Neither
+	   is handling different step sizes.  */
+	if ((tree_int_cst_sign_bit (iv.step)
+	     != tree_int_cst_sign_bit (niter->control.step))
+	    || !tree_int_cst_equal (iv.step, niter->control.step))
+	  continue;
+
+	/* Find a loop PHI node that defines guard_iv directly,
+	   or create one doing that.  */
+	gphi *phi = find_or_create_guard_phi (loop1, guard_iv, &iv);
+	if (!phi)
+	  continue;
+	gcond *guard_stmt = as_a<gcond *> (last_stmt (bbs[i]));
+	tree guard_init = PHI_ARG_DEF_FROM_EDGE (phi,
+						 loop_preheader_edge (loop1));
+	enum tree_code guard_code = gimple_cond_code (guard_stmt);
+
+	/* Loop splitting is implemented by versioning the loop, placing
+	   the new loop after the old loop, make the first loop iterate
+	   as long as the conditional stays true (or false) and let the
+	   second (new) loop handle the rest of the iterations.
+
+	   First we need to determine if the condition will start being true
+	   or false in the first loop.  */
+	bool initial_true;
+	switch (guard_code)
+	  {
+	    case LT_EXPR:
+	    case LE_EXPR:
+	      initial_true = !tree_int_cst_sign_bit (iv.step);
+	      break;
+	    case GT_EXPR:
+	    case GE_EXPR:
+	      initial_true = tree_int_cst_sign_bit (iv.step);
+	      break;
+	    default:
+	      gcc_unreachable ();
+	  }
+
+	/* Build a condition that will skip the first loop when the
+	   guard condition won't ever be true (or false).  */
+	gimple_seq stmts2;
+	border = force_gimple_operand (border, &stmts2, true, NULL_TREE);
+	if (stmts2)
+	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
+					    stmts2);
+	tree cond = build2 (guard_code, boolean_type_node, guard_init, border);
+	if (!initial_true)
+	  cond = fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, cond); 
+
+	/* Now version the loop, placing loop2 after loop1 connecting
+	   them, and fix up SSA form for that.  */
+	initialize_original_copy_tables ();
+	basic_block cond_bb;
+	struct loop *loop2 = loop_version (loop1, cond, &cond_bb,
+					   REG_BR_PROB_BASE, REG_BR_PROB_BASE,
+					   REG_BR_PROB_BASE, true);
+	gcc_assert (loop2);
+	update_ssa (TODO_update_ssa);
+
+	edge new_e = connect_loops (loop1, loop2);
+	connect_loop_phis (loop1, loop2, new_e);
+
+	/* The iterations of the second loop is now already
+	   exactly those that the first loop didn't do, but the
+	   iteration space of the first loop is still the original one.
+	   Compute the new bound for the guarding IV and patch the
+	   loop exit to use it instead of original IV and bound.  */
+	gimple_seq stmts = NULL;
+	tree newend = compute_new_first_bound (&stmts, niter, border,
+					       guard_code, guard_init);
+	if (stmts)
+	  gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop1),
+					    stmts);
+	tree guard_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop1));
+	patch_loop_exit (loop1, guard_stmt, guard_next, newend, initial_true);
+
+	/* Finally patch out the two copies of the condition to be always
+	   true/false (or opposite).  */
+	gcond *force_true = as_a<gcond *> (last_stmt (bbs[i]));
+	gcond *force_false = as_a<gcond *> (last_stmt (get_bb_copy (bbs[i])));
+	if (!initial_true)
+	  std::swap (force_true, force_false);
+	gimple_cond_make_true (force_true);
+	gimple_cond_make_false (force_false);
+	update_stmt (force_true);
+	update_stmt (force_false);
+
+	free_original_copy_tables ();
+
+	/* We destroyed LCSSA form above.  Eventually we might be able
+	   to fix it on the fly, for now simply punt and use the helper.  */
+	rewrite_into_loop_closed_ssa_1 (NULL, 0, SSA_OP_USE, loop1);
+
+	changed = true;
+	if (dump_file && (dump_flags & TDF_DETAILS))
+	  fprintf (dump_file, ";; Loop split.\n");
+
+	/* Only deal with the first opportunity.  */
+	break;
+      }
+
+  free (bbs);
+  return changed;
+}
+
+/* Main entry point.  Perform loop splitting on all suitable loops.  */
+
+static unsigned int
+tree_ssa_split_loops (void)
+{
+  struct loop *loop;
+  bool changed = false;
+
+  gcc_assert (scev_initialized_p ());
+  FOR_EACH_LOOP (loop, 0)
+    loop->aux = NULL;
+
+  /* Go through all loops starting from innermost.  */
+  FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
+    {
+      struct tree_niter_desc niter;
+      if (loop->aux)
+	{
+	  /* If any of our inner loops was split, don't split us,
+	     and mark our containing loop as having had splits as well.  */
+	  loop_outer (loop)->aux = loop;
+	  continue;
+	}
+
+      if (single_exit (loop)
+	  /* ??? We could handle non-empty latches when we split
+	     the latch edge (not the exit edge), and put the new
+	     exit condition in the new block.  OTOH this executes some
+	     code unconditionally that might have been skipped by the
+	     original exit before.  */
+	  && empty_block_p (loop->latch)
+	  && !optimize_loop_for_size_p (loop)
+	  && number_of_iterations_exit (loop, single_exit (loop), &niter,
+					false, true)
+	  && niter.cmp != ERROR_MARK
+	  /* We can't yet handle loops controlled by a != predicate.  */
+	  && niter.cmp != NE_EXPR)
+	{
+	  if (split_loop (loop, &niter))
+	    {
+	      /* Mark our containing loop as having had some split inner
+	         loops.  */
+	      loop_outer (loop)->aux = loop;
+	      changed = true;
+	    }
+	}
+    }
+
+  FOR_EACH_LOOP (loop, 0)
+    loop->aux = NULL;
+
+  if (changed)
+    return TODO_cleanup_cfg;
+  return 0;
+}
+
+/* Loop splitting pass.  */
+
+namespace {
+
+const pass_data pass_data_loop_split =
+{
+  GIMPLE_PASS, /* type */
+  "lsplit", /* name */
+  OPTGROUP_LOOP, /* optinfo_flags */
+  TV_LOOP_SPLIT, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_loop_split : public gimple_opt_pass
+{
+public:
+  pass_loop_split (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_loop_split, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) { return flag_split_loops != 0; }
+  virtual unsigned int execute (function *);
+
+}; // class pass_loop_split
+
+unsigned int
+pass_loop_split::execute (function *fun)
+{
+  if (number_of_loops (fun) <= 1)
+    return 0;
+
+  return tree_ssa_split_loops ();
+}
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_loop_split (gcc::context *ctxt)
+{
+  return new pass_loop_split (ctxt);
+}
Index: doc/invoke.texi
===================================================================
--- doc/invoke.texi	(revision 231115)
+++ doc/invoke.texi	(working copy)
@@ -446,7 +446,7 @@  Objective-C and Objective-C++ Dialects}.
 -fselective-scheduling -fselective-scheduling2 @gol
 -fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
 -fsemantic-interposition -fshrink-wrap -fsignaling-nans @gol
--fsingle-precision-constant -fsplit-ivs-in-unroller @gol
+-fsingle-precision-constant -fsplit-ivs-in-unroller -fsplit-loops@gol
 -fsplit-paths @gol
 -fsplit-wide-types -fssa-backprop -fssa-phiopt @gol
 -fstack-protector -fstack-protector-all -fstack-protector-strong @gol
@@ -10197,6 +10197,11 @@  Enabled with @option{-fprofile-use}.
 Enables the loop invariant motion pass in the RTL loop optimizer.  Enabled
 at level @option{-O1}
 
+@item -fsplit-loops
+@opindex fsplit-loops
+Split a loop into two if it contains a condition that's always true
+for one side of the iteration space and false for the other.
+
 @item -funswitch-loops
 @opindex funswitch-loops
 Move branches with loop invariant conditions out of the loop, with duplicates
Index: doc/passes.texi
===================================================================
--- doc/passes.texi	(revision 231115)
+++ doc/passes.texi	(working copy)
@@ -484,6 +484,12 @@  out of the loops.  To achieve this, a du
 each possible outcome of conditional jump(s).  The pass is implemented in
 @file{tree-ssa-loop-unswitch.c}.
 
+Loop splitting.  If a loop contains a conditional statement that is
+always true for one part of the iteration space and false for the other
+this pass splits the loop into two, one dealing with one side the other
+only with the other, thereby removing one inner-loop conditional.  The
+pass is implemented in @file{tree-ssa-loop-split.c}.
+
 The optimizations also use various utility functions contained in
 @file{tree-ssa-loop-manip.c}, @file{cfgloop.c}, @file{cfgloopanal.c} and
 @file{cfgloopmanip.c}.
Index: testsuite/gcc.dg/loop-split.c
===================================================================
--- testsuite/gcc.dg/loop-split.c	(revision 0)
+++ testsuite/gcc.dg/loop-split.c	(working copy)
@@ -0,0 +1,147 @@ 
+/* { dg-do run } */
+/* { dg-options "-O2 -fsplit-loops -fdump-tree-lsplit-details" } */
+
+#ifdef __cplusplus
+extern "C" int printf (const char *, ...);
+extern "C" void abort (void);
+#else
+extern int printf (const char *, ...);
+extern void abort (void);
+#endif
+
+/* Define TRACE to 1 or 2 to get detailed tracing.
+   Define SINGLE_TEST to 1 or 2 to get a simple routine with
+   just one loop, called only one time or with multiple parameters,
+   to make debugging easier.  */
+#ifndef TRACE
+#define TRACE 0
+#endif
+
+#define loop(beg,step,beg2,cond1,cond2) \
+    do \
+      { \
+	sum = 0; \
+        for (i = (beg), j = (beg2); (cond1); i+=(step),j+=(step)) \
+          { \
+            if (cond2) { \
+	      if (TRACE > 1) printf ("a: %d %d\n", i, j); \
+              sum += a[i]; \
+	    } else { \
+	      if (TRACE > 1) printf ("b: %d %d\n", i, j); \
+              sum += b[i]; \
+	    } \
+          } \
+	if (TRACE > 0) printf ("sum: %d\n", sum); \
+	check = check * 47 + sum; \
+      } while (0)
+
+#ifndef SINGLE_TEST
+unsigned __attribute__((noinline, noclone)) dotest (int beg, int end, int step,
+					       int c, int *a, int *b, int beg2)
+{
+  unsigned check = 0;
+  int sum;
+  int i, j;
+  loop (beg, 1, beg2, i < end, j < c);
+  loop (beg, 1, beg2, i <= end, j < c);
+  loop (beg, 1, beg2, i < end, j <= c);
+  loop (beg, 1, beg2, i <= end, j <= c);
+  loop (beg, 1, beg2, i < end, j > c);
+  loop (beg, 1, beg2, i <= end, j > c);
+  loop (beg, 1, beg2, i < end, j >= c);
+  loop (beg, 1, beg2, i <= end, j >= c);
+  beg2 += end-beg;
+  loop (end, -1, beg2, i >= beg, j >= c);
+  loop (end, -1, beg2, i >= beg, j > c);
+  loop (end, -1, beg2, i > beg, j >= c);
+  loop (end, -1, beg2, i > beg, j > c);
+  loop (end, -1, beg2, i >= beg, j <= c);
+  loop (end, -1, beg2, i >= beg, j < c);
+  loop (end, -1, beg2, i > beg, j <= c);
+  loop (end, -1, beg2, i > beg, j < c);
+  return check;
+}
+
+#else
+
+int __attribute__((noinline, noclone)) f (int beg, int end, int step,
+					  int c, int *a, int *b, int beg2)
+{
+  int sum = 0;
+  int i, j;
+  //for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
+  for (i = end, j = beg2 + (end-beg); i > beg; i += -1, j-- /*step*/)
+    {
+      // i - j == X --> i = X + j
+      // --> i < end == X+j < end == j < end - X
+      // --> newend = end - (i_init - j_init)
+      // j < end-X && j < c --> j < min(end-X,c)
+      // j < end-X && j <= c --> j <= min(end-X-1,c) or j < min(end-X,c+1{OF!})
+      //if (j < c)
+      if (j >= c)
+	printf ("a: %d %d\n", i, j);
+      /*else
+	printf ("b: %d %d\n", i, j);*/
+	/*sum += a[i];
+      else
+	sum += b[i];*/
+    }
+  return sum;
+}
+
+int __attribute__((noinline, noclone)) f2 (int *beg, int *end, int step,
+					  int *c, int *a, int *b, int *beg2)
+{
+  int sum = 0;
+  int *i, *j;
+  for (i = beg, j = beg2; i < end; i += 1, j++ /*step*/)
+    {
+      if (j <= c)
+	printf ("%d %d\n", i - beg, j - beg);
+	/*sum += a[i];
+      else
+	sum += b[i];*/
+    }
+  return sum;
+}
+#endif
+
+extern int printf (const char *, ...);
+
+int main ()
+{
+  int a[] = {0,0,0,0,0, 1,2,3,4,5,6,7,8,9,          0,0,0,0,0};
+  int b[] = {0,0,0,0,0, -1,-2,-3,-4,-5,-6,-7,-8,-9, 0,0,0,0,0,};
+  int c;
+  int diff = 0;
+  unsigned check = 0;
+#if defined(SINGLE_TEST) && (SINGLE_TEST == 1)
+  //dotest (0, 9, 1, -1, a+5, b+5, -1);
+  //return 0;
+  f (0, 9, 1, 5, a+5, b+5, -1);
+  return 0;
+#endif
+  for (diff = -5; diff <= 5; diff++)
+    {
+      for (c = -1; c <= 10; c++)
+	{
+#ifdef SINGLE_TEST
+	  int s = f (0, 9, 1, c, a+5, b+5, diff);
+	  //int s = f2 (a+0, a+9, 1, a+c, a+5, b+5, a+diff);
+	  printf ("%d ", s);
+#else
+	  if (TRACE > 0)
+	    printf ("check %d %d\n", c, diff);
+	  check = check * 51 + dotest (0, 9, 1, c, a+5, b+5, diff);
+#endif
+	}
+      //printf ("\n");
+    }
+  //printf ("%u\n", check);
+  if (check != 3213344948)
+    abort ();
+  return 0;
+}
+
+/* All 16 loops in dotest should be split.  */
+/* { dg-final { scan-tree-dump-times "Loop split" 16 "lsplit" } } */