[RFC] Straight-line strength reduction

Message ID	1326119973.12415.12.camel@gnopaine
State	New
Headers	show Return-Path: <gcc-patches-return-311101-incoming=patchwork.ozlabs.org@gcc.gnu.org> Comment: DKIM? See http://www.dkim.org Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Received:Received:Received:Message-ID:Subject:From:To:Cc:Date:Content-Type:Content-Transfer-Encoding:Mime-Version:x-cbid:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=uJGpY51EfsQR1fgv3aYhSsFfdplKqkgGvin/arZrjky6gy3/HtFqjNEMd1wIJ3 M109M9uTmxPontvkm4nwJrsaiLDnJGdSyG44jWThmlEBPnC6hl+SrKfLL6EVgKKh ITRBcZ3sknHQjs0QJqEscIL8GkaRIbtc/aHmmW4fuozj0=; Gateway: Authorized Use Only! Violators will be prosecuted for <gcc-patches@gcc.gnu.org> from <wschmidt@linux.vnet.ibm.com>; Mon, 9 Jan 2012 09:41:49 -0500 Gateway: Authorized Use Only! Violators will be prosecuted; Mon, 9 Jan 2012 09:39:37 -0500 Message-ID: <1326119973.12415.12.camel@gnopaine> Subject: [PATCH,RFC] Straight-line strength reduction From: "William J. Schmidt" <wschmidt@linux.vnet.ibm.com> To: gcc-patches@gcc.gnu.org Cc: bergner@vnet.ibm.com Date: Mon, 09 Jan 2012 08:39:33 -0600 Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: 7bit Mime-Version: 1.0 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: gcc/tree-pass.h =================================================================== --- gcc/tree-pass.h (revision 182962) +++ gcc/tree-pass.h (working copy) @@ -455,6 +455,7 @@ extern struct gimple_opt_pass pass_tm_memopt; extern struct gimple_opt_pass pass_tm_edges; extern struct gimple_opt_pass pass_split_functions; extern struct gimple_opt_pass pass_feedback_split_functions; +extern struct gimple_opt_pass pass_strength_reduction; /* IPA Passes */ extern struct simple_ipa_opt_pass pass_ipa_lower_emutls; Index: gcc/ChangeLog =================================================================== --- gcc/ChangeLog (revision 182962) +++ gcc/ChangeLog (working copy) @@ -1,3 +1,26 @@ +2012-01-09 Bill Schmidt <wschmidt@linux.vnet.ibm.com> + + * tree-pass.h: Declare pass_strength_reduction. + * tree-ssa-loop-ivopts.c (includes): Remove #undef add_cost. + (add_cost): Rename to add_regs_cost. + (multiply_regs_cost): New function. + (add_const_cost): Likewise. + (extend_or_trunc_cost): Likewise. + (negate_cost): Likewise. + (get_address_cost): Rename add_cost to add_regs_cost. + (force_expr_to_var_cost): Likewise. + (get_computation_cost_at): Likewise. + (determine_iv_cost): Likewise. + * timevar.def: Add TV_TREE_SLSR. + * tree-ssa-strength-reduction.c: New file. + * tree-flow.h (add_regs_cost): Declare. + (multiply_regs_cost): Likewise. + (add_const_cost): Likewise. + (extend_or_trunc_cost): Likewise. + (negate_cost): Likewise. + * Makefile.in: Add tree-ssa-strength-reduction.o deps. + * passes.c: Add strength reduction pass. + 2012-01-06 Arnaud Charlet <charlet@adacore.com> * c-decl.c (ext_block): Moved up. Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-1.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +extern void foo (int); + +void +f (int *p, unsigned int n) +{ + foo (*(p + n * 4)); + foo (*(p + 32 + n * 4)); + if (n > 3) + foo (*(p + 16 + n * 4)); + else + foo (*(p + 48 + n * 4)); +} + +/* { dg-final { scan-tree-dump-times "\\+ 128" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 64" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 192" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-2.c (revision 0) @@ -0,0 +1,16 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +extern void foo (int); + +void +f (int *p, int n) +{ + foo (*(p + n++ * 4)); + foo (*(p + 32 + n++ * 4)); + foo (*(p + 16 + n * 4)); +} + +/* { dg-final { scan-tree-dump-times "\\+ 144" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 96" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-10.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-10.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-10.c (revision 0) @@ -0,0 +1,23 @@ +/* Verify straight-line strength reduction for simple integer addition + with stride reversed on 1st and 3rd instances. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a1, a2, a3, x1, x2, x3, x; + + a1 = 2 * s; + x1 = a1 + c; + a2 = 4 * s; + x2 = c + a2; + a3 = 6 * s; + x3 = a3 + c; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-3.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +foo (int a[], int b[], int i) +{ + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + a[i] = b[i] + 2; + i++; + return i; +} + +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 4" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 8" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 12" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-11.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-11.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-11.c (revision 0) @@ -0,0 +1,24 @@ +/* Verify straight-line strength reduction for simple integer addition + with casts thrown in. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +long +f (int s, long c) +{ + int a1, a2, a3; + long x1, x2, x3, x; + + a1 = 2 * s; + x1 = c + a1; + a2 = 4 * s; + x2 = c + a2; + a3 = 6 * s; + x3 = c + a3; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-4.c (revision 0) @@ -0,0 +1,37 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-slsr -fdump-tree-optimized" } */ + +void foo (int); + +int +f (int i) +{ + int x, y; + + x = i * 4; + y = x * 10; + foo (y); + + i = i + 5; + x = i * 4; + y = x * 10; + foo (y); + + i = i - 4; + x = i * 4; + y = x * 10; + foo (y); +} + +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 20;" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\- 16;" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\- 160" 1 "slsr" } } */ +/* { dg-final { scan-tree-dump-times "\\* 4" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\* 10" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 200" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\\+ 40" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "slsr" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-20.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-20.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-20.c (revision 0) @@ -0,0 +1,21 @@ +/* Verify straight-line strength reduction for multiply candidates + with stride in inconsistent positions. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int c, int s) +{ + int x1, x2, y1, y2; + + y1 = c + 2; + x1 = y1 * s; + y2 = y1 + 2; + x2 = s * y2; + return x1 + x2; +} + +/* { dg-final { scan-tree-dump-times " \\* s" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 2" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-12.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-12.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-12.c (revision 0) @@ -0,0 +1,30 @@ +/* Verify that no straight-line strength reduction occurs across sibling + blocks. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a1, a2, a3, x1, x2, x3, x; + + if (c > 0) + { + a1 = 2 * s; + x1 = c + a1; + } + else + { + a1 = 4 * s; + x1 = c + a1; + } + + a2 = 6 * s; + x2 = c + a2; + x = x1 + x2; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 3 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-5.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-5.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-5.c (revision 0) @@ -0,0 +1,22 @@ +/* Verify straight-line strength reduction for simple add candidates. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a1, a2, a3, x1, x2, x3, x; + + a1 = 2 * s; + x1 = c + a1; + a2 = 4 * s; + x2 = c + a2; + a3 = 6 * s; + x3 = c + a3; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-21.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-21.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-21.c (revision 0) @@ -0,0 +1,32 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + + if (n > 64) + { + x1 = x + 3; + a += x1 * stride; + x2 = x1 + 3; + a += x2 * stride; + } + else + { + x3 = x + 3; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-13.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-13.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-13.c (revision 0) @@ -0,0 +1,25 @@ +/* x2 and x3 will be strength-reduced based on the same statement + but with different variables as the stride. Note that they will + be strength-reduced by introducing an initializer 4*s which is + cheaper than 5*s; similar for 4*c and 5*c. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a2, a3, x1, x2, x3, x; + + x1 = c + s; + a2 = 5 * s; + x2 = c + a2; + a3 = 5 * c; + x3 = s + a3; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* 4" 2 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 5" 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-6.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-6.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-6.c (revision 0) @@ -0,0 +1,25 @@ +/* Verify straight-line strength reduction for simple add candidates, + pointer version. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +void +f (int s, char *c, char *x1, char *x2, char *x3) +{ + int a1, a2, a3; + + a1 = 2 * s; + x1 = c + a1; + *x1 = 1; + a2 = 4 * s; + x2 = c + a2; + *x2 = 2; + a3 = 6 * s; + x3 = c + a3; + *x3 = 3; +} + +/* There will be four ' * ' instances for the parms, one in the code. */ +/* { dg-final { scan-tree-dump-times " \\* " 5 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-22.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-22.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-22.c (revision 0) @@ -0,0 +1,29 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + + if (n > 64) + { + x1 = x + 3; + a += x1 * stride; + x2 = x1 + 3; + a += x2 * stride; + x3 = x2 + 3; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-14.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-14.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-14.c (revision 0) @@ -0,0 +1,32 @@ +/* Straight-line strength reduction control flow variation. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int c, int s) +{ + int a1, a2, x, x1, x2, x3, x4; + + a1 = 2 * s; + + if (n > 64) + { + x1 = c + a1; + a2 = 4 * s; + x2 = c + a2; + x = x1 + x2; + } + else + { + x3 = c + a1; + a2 = 4 * s; + x4 = c + a2; + x = x4 / x3; + } + + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-7.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-7.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-7.c (revision 0) @@ -0,0 +1,22 @@ +/* Verify straight-line strength reduction for simple integer subtraction. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a1, a2, a3, x1, x2, x3, x; + + a1 = 2 * s; + x1 = c - a1; + a2 = 4 * s; + x2 = c - a2; + a3 = 6 * s; + x3 = c - a3; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-23.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-23.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-23.c (revision 0) @@ -0,0 +1,29 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + x1 = x + 3; + a += x1 * stride; + + if (n > 64) + { + x2 = x1 + 3; + a += x2 * stride; + x3 = x2 + 3; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 3" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-15.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-15.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-15.c (revision 0) @@ -0,0 +1,27 @@ +/* Straight-line strength reduction control flow variation. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int c, int s) +{ + int a, x1, x2, x3; + + x1 = x2 = x3 = c; + + if (n > 64) + { + a = 2 * s; + x1 = c + a; + a = 4 * s; + x2 = c + a; + a = 6 * s; + x3 = c + a; + } + + return x1 + x2 + x3; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-8.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-8.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-8.c (revision 0) @@ -0,0 +1,22 @@ +/* Verify straight-line strength reduction for simple pointer subtraction. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int* +f (int s, int *c) +{ + int a1, a2, a3, *x1, *x2, *x3; + + a1 = 2 * s; + x1 = c - a1; + a2 = 4 * s; + x2 = c - a2; + a3 = 6 * s; + x3 = c - a3; + return x1 ? x2 : x3; +} + +/* There are 3 ' * ' instances in the decls, 1 parm, 2 in the code. */ +/* { dg-final { scan-tree-dump-times " \\* " 6 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-24.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-24.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-24.c (revision 0) @@ -0,0 +1,31 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow, increment = 1. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + + if (n > 64) + { + x1 = x + 1; + a += x1 * stride; + x2 = x1 + 1; + a += x2 * stride; + } + else + { + x3 = x + 1; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-16.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-16.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-16.c (revision 0) @@ -0,0 +1,28 @@ +/* Straight-line strength reduction control flow variation. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int c, int s) +{ + int a2, a3, a4, x1, x2, x3, x4; + + x1 = c + s; + a2 = 3 * s; + x2 = c + a2; + x3 = x4 = c; + + if (n > 64) + { + a3 = 5 * s; + x3 = c + a3; + a4 = 7 * s; + x4 = c + a4; + } + + return x1 + x2 + x3 + x4; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-9.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-9.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-9.c (revision 0) @@ -0,0 +1,23 @@ +/* Verify straight-line strength reduction for simple integer addition + with stride reversed. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int s, int c) +{ + int a1, a2, a3, x1, x2, x3, x; + + a1 = 2 * s; + x1 = a1 + c; + a2 = 4 * s; + x2 = a2 + c; + a3 = 6 * s; + x3 = a3 + c; + x = x1 + x2 + x3; + return x; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-25.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-25.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-25.c (revision 0) @@ -0,0 +1,31 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow, increment = -1. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + + if (n > 64) + { + x1 = x - 1; + a += x1 * stride; + x2 = x1 - 1; + a += x2 * stride; + } + else + { + x3 = x - 1; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-17.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-17.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-17.c (revision 0) @@ -0,0 +1,31 @@ +/* Straight-line strength reduction control flow variation with incr = 1. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int c, int s) +{ + int a2, a3, a4, x1, x2, x3, x4; + + x1 = c + s; + x2 = x3 = x4 = c; + + if (n > 64) + { + a2 = 2 * s; + x2 = c + a2; + a3 = 3 * s; + x3 = c + a3; + } + else + { + a4 = 2 * s; + x4 = c + a4; + } + + return x1 + x2 + x3 + x4; +} + +/* { dg-final { scan-tree-dump-times " \\* " 0 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-26.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-26.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-26.c (revision 0) @@ -0,0 +1,32 @@ +/* Verify straight-line strength reduction for multiply candidates + with variable stride and control flow, increment = -3. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int x, int stride) +{ + int a, x1, x2, x3; + + a = x * stride; + + if (n > 64) + { + x1 = x - 3; + a += x1 * stride; + x2 = x1 - 3; + a += x2 * stride; + } + else + { + x3 = x - 3; + a += x3 * stride; + } + + return a; +} + +/* { dg-final { scan-tree-dump-times " \\* stride" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* -3" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-18.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-18.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-18.c (revision 0) @@ -0,0 +1,32 @@ +/* Straight-line strength reduction control flow variation with incr = -1. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int n, int c, int s) +{ + int a1, a2, a3, a4, x1, x2, x3, x4; + + a1 = 4 * s; + x1 = c + a1; + x2 = x3 = x4 = c; + + if (n > 64) + { + a2 = 3 * s; + x2 = c + a2; + a3 = 2 * s; + x3 = c + a3; + } + else + { + a4 = 3 * s; + x4 = c + a4; + } + + return x1 + x2 + x3 + x4; +} + +/* { dg-final { scan-tree-dump-times " \\* " 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Index: gcc/testsuite/gcc.dg/tree-ssa/slsr-19.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/slsr-19.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/slsr-19.c (revision 0) @@ -0,0 +1,22 @@ +/* Verify straight-line strength reduction for multiply candidates + with stride in RHS1 position. */ + +/* { dg-do compile } */ +/* { dg-options "-O3 -fdump-tree-optimized" } */ + +int +f (int c, int s) +{ + int x1, x2, y1, y2; + + y1 = c + 2; + x1 = s * y1; + y2 = y1 + 2; + x2 = s * y2; + return x1 + x2; +} + +/* { dg-final { scan-tree-dump-times " \\* y" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times " \\* 2" 1 "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ + Index: gcc/tree-ssa-loop-ivopts.c =================================================================== --- gcc/tree-ssa-loop-ivopts.c (revision 182962) +++ gcc/tree-ssa-loop-ivopts.c (working copy) @@ -92,10 +92,9 @@ along with GCC; see the file COPYING3. If not see #include "tree-inline.h" #include "tree-ssa-propagate.h" -/* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses. +/* FIXME: zero_cost defined in exprmed.h conflicts with local uses. */ #include "expmed.h" -#undef add_cost #undef zero_cost /* FIXME: Expressions are expanded to RTL in this pass to determine the @@ -3051,8 +3050,8 @@ adjust_setup_cost (struct ivopts_data *data, unsig /* Returns cost of addition in MODE. */ -static unsigned -add_cost (enum machine_mode mode, bool speed) +unsigned +add_regs_cost (enum machine_mode mode, bool speed) { static unsigned costs[NUM_MACHINE_MODES]; rtx seq; @@ -3081,6 +3080,148 @@ adjust_setup_cost (struct ivopts_data *data, unsig return cost; } +/* Returns cost of multiplication in MODE. */ + +unsigned +multiply_regs_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES]; + rtx seq; + unsigned cost; + + if (costs[mode]) + return costs[mode]; + + start_sequence (); + force_operand (gen_rtx_fmt_ee (MULT, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Multiplication in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + +/* Returns cost of addition with a constant in MODE. */ + +unsigned +add_const_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES]; + rtx seq; + unsigned cost; + + if (costs[mode]) + return costs[mode]; + + /* Arbitrarily generate insns for x + 2, as the exact constant + shouldn't matter. */ + start_sequence (); + force_operand (gen_rtx_fmt_ee (PLUS, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1), + gen_int_mode (2, mode)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Addition to constant in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + +/* Returns cost of extend or truncate in MODE. */ + +unsigned +extend_or_trunc_cost (tree type_to, tree type_from, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES][NUM_MACHINE_MODES]; + rtx seq; + unsigned cost; + enum machine_mode mode_to = TYPE_MODE (type_to); + enum machine_mode mode_from = TYPE_MODE (type_from); + tree size_to = TYPE_SIZE (type_to); + tree size_from = TYPE_SIZE (type_from); + enum rtx_code code; + + gcc_assert (TREE_CODE (size_to) == INTEGER_CST + && TREE_CODE (size_from) == INTEGER_CST); + + if (costs[mode_to][mode_from]) + return costs[mode_to][mode_from]; + + if (tree_int_cst_lt (size_to, size_from)) + code = TRUNCATE; + else if (TYPE_UNSIGNED (type_to)) + code = ZERO_EXTEND; + else + code = SIGN_EXTEND; + + start_sequence (); + gen_rtx_fmt_e (code, mode_to, + gen_raw_REG (mode_from, LAST_VIRTUAL_REGISTER + 1)); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Conversion from %s to %s costs %d\n", + GET_MODE_NAME (mode_to), GET_MODE_NAME (mode_from), cost); + + costs[mode_to][mode_from] = cost; + return cost; +} + +/* Returns cost of negation in MODE. */ + +unsigned +negate_cost (enum machine_mode mode, bool speed) +{ + static unsigned costs[NUM_MACHINE_MODES]; + rtx seq; + unsigned cost; + + if (costs[mode]) + return costs[mode]; + + start_sequence (); + force_operand (gen_rtx_fmt_e (NEG, mode, + gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1)), + NULL_RTX); + seq = get_insns (); + end_sequence (); + + cost = seq_cost (seq, speed); + if (!cost) + cost = 1; + + costs[mode] = cost; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Negation in %s costs %d\n", + GET_MODE_NAME (mode), cost); + return cost; +} + /* Entry in a hashtable of already known costs for multiplication. */ struct mbc_entry { @@ -3408,7 +3549,7 @@ get_address_cost (bool symbol_present, bool var_pr If VAR_PRESENT is true, try whether the mode with SYMBOL_PRESENT = false is cheaper even with cost of addition, and if this is the case, use it. */ - add_c = add_cost (address_mode, speed); + add_c = add_regs_cost (address_mode, speed); for (i = 0; i < 8; i++) { var_p = i & 1; @@ -3492,7 +3633,7 @@ get_address_cost (bool symbol_present, bool var_pr cost += multiply_by_cost (ratio, address_mode, speed); if (s_offset && !offset_p && !symbol_present) - cost += add_cost (address_mode, speed); + cost += add_regs_cost (address_mode, speed); if (may_autoinc) *may_autoinc = autoinc; @@ -3659,7 +3800,7 @@ force_expr_to_var_cost (tree expr, bool speed) case PLUS_EXPR: case MINUS_EXPR: case NEGATE_EXPR: - cost = new_cost (add_cost (mode, speed), 0); + cost = new_cost (add_regs_cost (mode, speed), 0); if (TREE_CODE (expr) != NEGATE_EXPR) { tree mult = NULL_TREE; @@ -4154,7 +4295,7 @@ get_computation_cost_at (struct ivopts_data *data, &symbol_present, &var_present, &offset, depends_on)); cost.cost /= avg_loop_niter (data->current_loop); - cost.cost += add_cost (TYPE_MODE (ctype), data->speed); + cost.cost += add_regs_cost (TYPE_MODE (ctype), data->speed); } if (inv_expr_id) @@ -4195,14 +4336,14 @@ get_computation_cost_at (struct ivopts_data *data, are added once to the variable, if present. */ if (var_present && (symbol_present || offset)) cost.cost += adjust_setup_cost (data, - add_cost (TYPE_MODE (ctype), speed)); + add_regs_cost (TYPE_MODE (ctype), speed)); /* Having offset does not affect runtime cost in case it is added to symbol, but it increases complexity. */ if (offset) cost.complexity++; - cost.cost += add_cost (TYPE_MODE (ctype), speed); + cost.cost += add_regs_cost (TYPE_MODE (ctype), speed); aratio = ratio > 0 ? ratio : -ratio; if (aratio != 1) @@ -5052,7 +5193,7 @@ determine_iv_cost (struct ivopts_data *data, struc or a const set. */ if (cost_base.cost == 0) cost_base.cost = COSTS_N_INSNS (1); - cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); + cost_step = add_regs_cost (TYPE_MODE (TREE_TYPE (base)), data->speed); cost = cost_step + adjust_setup_cost (data, cost_base.cost); Index: gcc/timevar.def =================================================================== --- gcc/timevar.def (revision 182962) +++ gcc/timevar.def (working copy) @@ -253,6 +253,7 @@ DEFTIMEVAR (TV_TREE_IFCOMBINE , "tree if-co DEFTIMEVAR (TV_TREE_UNINIT , "uninit var analysis") DEFTIMEVAR (TV_PLUGIN_INIT , "plugin initialization") DEFTIMEVAR (TV_PLUGIN_RUN , "plugin execution") +DEFTIMEVAR (TV_TREE_SLSR , "straight-line strength reduction") /* Everything else in rest_of_compilation not included above. */ DEFTIMEVAR (TV_EARLY_LOCAL , "early local passes") Index: gcc/tree-ssa-strength-reduction.c =================================================================== --- gcc/tree-ssa-strength-reduction.c (revision 0) +++ gcc/tree-ssa-strength-reduction.c (revision 0) @@ -0,0 +1,2355 @@ +/* Straight-line strength reduction. + Copyright (C) 2012 Free Software Foundation, Inc. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with GCC; see the file COPYING3. If not see +<http://www.gnu.org/licenses/>. */ + +/* There are many algorithms for performing strength reduction on + loops. This is not one of them. IVOPTS handles strength reduction + of induction variables just fine. This pass is intended to pick + up the crumbs it leaves behind, by considering opportunities for + strength reduction along dominator paths. + + Strength reduction will be implemented in four stages, gradually + adding more complex candidates: + + 1) Explicit multiplies, known constant multipliers, no + conditional increments. (complete) + 2) Explicit multiplies, unknown constant multipliers, + no conditional increments. (complete) + 3) Explicit multiplies, conditional increments. + 4) Implicit multiplies in addressing expressions. + + It would also be possible to apply strength reduction to divisions + and modulos, but such opportunities are relatively uncommon. + + Strength reduction is also currently restricted to integer operations. + If desired, it could be extended to floating-point operations under + control of something like -funsafe-math-optimizations. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "tree.h" +#include "gimple.h" +#include "basic-block.h" +#include "tree-pass.h" +#include "timevar.h" +#include "cfgloop.h" +#include "tree-pretty-print.h" +#include "gimple-pretty-print.h" +#include "alloc-pool.h" +#include "tree-flow.h" +#include "domwalk.h" + +/* Information about a strength reduction candidate. Each statement + in the candidate represents an expression of one of the following + forms: + + (CAND_MULT) S1: X = (B + i) * S + (CAND_ADD) S1: X = B + (i * S) + + Here X and B are SSA names, i is an integer constant, and S is + either an SSA name or a constant. We call B the "base," i the + "index", and S the "stride." + + Any statement S0 that dominates S1 and is of the form: + + (CAND_MULT) S0: Y = (B + i') * S + (CAND_ADD) S0: Y = B + (i' * S) + + is called a "basis" for S1. In both cases, S1 may be replaced by + + S1': X = Y + (i - i') * S, + + where (i - i') * S is folded to the extent possible. + + All gimple statements are visited in dominator order, and each + statement that may contribute to one of the forms of S1 above is + given at least one entry in the candidate table. Such statements + include addition, pointer addition, subtraction, multiplication, + negation, copies, and nontrivial type casts. If a statement may + represent more than one expression of the forms of S1 above, + multiple "interpretations" are stored in the table and chained + together. Examples: + + * An add of two SSA names may treat either operand as the base. + * A multiply of two SSA names, likewise. + * A copy or cast may be thought of as either a CAND_MULT with + i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0. + + Candidate records are allocated from an allocation pool. They are + addressed both from a hash table keyed on S1, and from a vector of + candidate pointers arranged in predominator order. + + Opportunity note + ---------------- + Currently we don't recognize: + + S0: Y = (S * i') - B + S1: X = (S * i) - B + + as a strength reduction opportunity, even though this S1 would + also be replaceable by the S1' above. This can be added if it + comes up in practice. */ + +/* Index into the candidate vector, offset by 1. VECs are zero-based, + while cand_idx's are one-based, with zero indicating null. */ + +typedef unsigned cand_idx; + +/* The kind of candidate. */ + +enum cand_kind +{ + CAND_MULT, + CAND_ADD +}; + +struct slsr_cand_d +{ + /* The candidate statement S1. */ + gimple cand_stmt; + + /* The base SSA name B. */ + tree base_name; + + /* The stride S. */ + tree stride; + + /* The index constant i. */ + double_int index; + + /* The type of the candidate. This is normally the type of base_name, + but casts may have occurred when combining feeding instructions. + A candidate can only be a basis for candidates of the same final type. */ + tree cand_type; + + /* The kind of candidate (CAND_MULT, etc.). */ + enum cand_kind kind; + + /* Index of this candidate in the candidate vector. */ + cand_idx cand_num; + + /* Index of the next candidate record for the same statement. + A statement may be useful in more than one way (e.g., due to + commutatitivity). So we can have multiple "interpretations" + of a statement. */ + cand_idx next_interp; + + /* Index of the basis statement S0, if any, in the candidate vector. */ + cand_idx basis; + + /* First candidate for which this candidate is a basis, if one exists. */ + cand_idx dependent; + + /* Next candidate having the same basis as this one. */ + cand_idx sibling; + + /* If this is a conditional candidate, the defining PHI statement + for the base SSA name B. For future use; always NULL for now. */ + gimple def_phi; + + /* Access to the statement for subsequent modification. Cached to + save compile time. */ + gimple_stmt_iterator cand_gsi; + + /* Savings that can be expected from eliminating dead code if this + candidate is replaced. */ + int dead_savings; +}; + +typedef struct slsr_cand_d slsr_cand, *slsr_cand_t; +typedef const struct slsr_cand_d *const_slsr_cand_t; + +/* Pointers to candidates are chained together as part of a mapping + from SSA names to the candidates that use them as a base name. */ + +struct cand_chain_d +{ + /* SSA name that serves as a base name for the chain of candidates. */ + tree base_name; + + /* Pointer to a candidate. */ + slsr_cand_t cand; + + /* Chain pointer. */ + struct cand_chain_d *next; + +}; + +typedef struct cand_chain_d cand_chain, *cand_chain_t; +typedef const struct cand_chain_d *const_cand_chain_t; + +/* Information about a unique "increment" associated with candidates + having an SSA name for a stride. An increment is the difference + between the index of the candidate and the index of its basis, + i.e., (i - i') as discussed in the module commentary. */ + +struct incr_info_d +{ + /* The increment that relates a candidate to its basis. */ + double_int incr; + + /* How many times the increment occurs in the candidate tree. */ + unsigned count; + + /* Cost of replacing candidates using this increment. Negative and + zero costs indicate replacement should be performed. */ + int cost; + + /* If this increment is profitable but is not -1, 0, or 1, it requires + an initializer T_0 = stride * incr to be found or introduced in the + nearest common dominator of all candidates. This field holds T_0 + for subsequent use. */ + tree initializer; + + /* If the initializer was found to already exist, this is the block + where it was found. */ + basic_block init_bb; + +}; + +typedef struct incr_info_d incr_info, *incr_info_t; + +/* Candidates are maintained in a vector. If candidate X dominates + candidate Y, then X appears before Y in the vector; but the + converse does not necessarily hold. */ +DEF_VEC_P (slsr_cand_t); +DEF_VEC_ALLOC_P (slsr_cand_t, heap); +static VEC (slsr_cand_t, heap) *cand_vec; + +enum cost_consts +{ + COST_NEUTRAL = 0, + COST_INFINITE = 1000 +}; + +/* Hash table embodying a mapping from statements to candidates. */ +static htab_t stmt_cand_map; + +/* Allocation pool for candidates. */ +static alloc_pool cand_pool; + +/* Hash table embodying a mapping from base names to chains of candidates. */ +static htab_t base_cand_map; + +/* Allocation pool for candidate chains. */ +static alloc_pool chain_pool; + +/* An array INCR_VEC of incr_infos is used during analysis of related + candidates having an SSA name for a stride. INCR_VEC_LEN describes + its current length. */ +static incr_info_t incr_vec; +static unsigned incr_vec_len; + +/* Produce a pointer to the IDX'th candidate in the candidate vector. */ + +static slsr_cand_t +lookup_cand (cand_idx idx) +{ + return VEC_index (slsr_cand_t, cand_vec, idx - 1); +} + +/* Callback to produce a hash value for a candidate. */ + +static hashval_t +stmt_cand_hash (const void *p) +{ + return htab_hash_pointer (((const_slsr_cand_t) p)->cand_stmt); +} + +/* Callback when an element is removed from the hash table. + We never remove entries until the entire table is released. */ + +static void +stmt_cand_free (void *p ATTRIBUTE_UNUSED) +{ +} + +/* Callback to return true if two candidates are equal. */ + +static int +stmt_cand_eq (const void *p1, const void *p2) +{ + const_slsr_cand_t const cand1 = (const_slsr_cand_t) p1; + const_slsr_cand_t const cand2 = (const_slsr_cand_t) p2; + return (cand1->cand_stmt == cand2->cand_stmt); +} + +/* Callback to produce a hash value for a candidate chain header. */ + +static hashval_t +base_cand_hash (const void *p) +{ + tree ssa_name = ((const_cand_chain_t) p)->base_name; + + if (TREE_CODE (ssa_name) != SSA_NAME) + return (hashval_t) 0; + + return (hashval_t) SSA_NAME_VERSION (ssa_name); +} + +/* Callback when an element is removed from the hash table. + We never remove entries until the entire table is released. */ + +static void +base_cand_free (void *p ATTRIBUTE_UNUSED) +{ +} + +/* Callback to return true if two candidate chain headers are equal. */ + +static int +base_cand_eq (const void *p1, const void *p2) +{ + const_cand_chain_t const chain1 = (const_cand_chain_t) p1; + const_cand_chain_t const chain2 = (const_cand_chain_t) p2; + return operand_equal_p (chain1->base_name, chain2->base_name, 0); +} + +/* Use the base name from candidate C to look for possible candidates + that can serve as a basis for C. Each potential basis must also + appear in a block that dominates the candidate statement and have + the same stride and type. If more than one possible basis exists, + the one with highest index in the vector is chosen; this will be + the most immediately dominating basis. */ + +static int +find_basis_for_candidate (slsr_cand_t c) +{ + cand_chain mapping_key; + cand_chain_t chain; + slsr_cand_t basis = NULL; + + mapping_key.base_name = c->base_name; + chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key); + + for (; chain; chain = chain->next) + { + slsr_cand_t one_basis = chain->cand; + + if (one_basis->kind != c->kind + || !operand_equal_p (one_basis->stride, c->stride, 0) + || !types_compatible_p (one_basis->cand_type, c->cand_type) + || !dominated_by_p (CDI_DOMINATORS, + gimple_bb (c->cand_stmt), + gimple_bb (one_basis->cand_stmt))) + continue; + + if (!basis || basis->cand_num < one_basis->cand_num) + basis = one_basis; + } + + if (basis) + { + c->sibling = basis->dependent; + basis->dependent = c->cand_num; + return basis->cand_num; + } + + return 0; +} + +/* Record a mapping from the base name of C to C itself, indicating that + C may potentially serve as a basis using that base name. */ + +static void +record_potential_basis (slsr_cand_t c) +{ + void **slot; + cand_chain_t node; + + node = (cand_chain_t) pool_alloc (chain_pool); + node->base_name = c->base_name; + node->cand = c; + node->next = NULL; + slot = htab_find_slot (base_cand_map, node, INSERT); + + if (*slot) + { + cand_chain_t head = (cand_chain_t) (*slot); + node->next = head->next; + head->next = node; + } + else + *slot = node; +} + +/* Allocate storage for a new candidate and initialize its fields. + Attempt to find a basis for the candidate. */ + +static slsr_cand_t +alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base, + double_int index, tree stride, tree ctype, + unsigned savings, gimple_stmt_iterator gsi) +{ + slsr_cand_t c = (slsr_cand_t) pool_alloc (cand_pool); + c->cand_stmt = gs; + c->base_name = base; + c->stride = stride; + c->index = index; + c->cand_type = ctype; + c->kind = kind; + c->cand_num = VEC_length (slsr_cand_t, cand_vec) + 1; + c->next_interp = 0; + c->dependent = 0; + c->sibling = 0; + c->def_phi = NULL; + c->cand_gsi = gsi; + c->dead_savings = savings; + + VEC_safe_push (slsr_cand_t, heap, cand_vec, c); + c->basis = find_basis_for_candidate (c); + record_potential_basis (c); + + return c; +} + +/* Determine the target cost of statement GS when compiling according + to SPEED. */ + +static int +stmt_cost (gimple gs, bool speed) +{ + tree lhs, rhs1, rhs2; + enum machine_mode lhs_mode; + + gcc_assert (is_gimple_assign (gs)); + lhs = gimple_assign_lhs (gs); + rhs1 = gimple_assign_rhs1 (gs); + lhs_mode = TYPE_MODE (TREE_TYPE (lhs)); + + switch (gimple_assign_rhs_code (gs)) + { + case MULT_EXPR: + rhs2 = gimple_assign_rhs2 (gs); + + if (TREE_CODE (rhs2) == INTEGER_CST) + return multiply_by_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed); + + if (TREE_CODE (rhs1) == INTEGER_CST) + return multiply_by_cost (TREE_INT_CST_LOW (rhs1), lhs_mode, speed); + + return multiply_regs_cost (TYPE_MODE (TREE_TYPE (lhs)), speed); + + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + case MINUS_EXPR: + rhs2 = gimple_assign_rhs2 (gs); + + if (TREE_CODE (rhs2) == INTEGER_CST) + return add_const_cost (TYPE_MODE (TREE_TYPE (rhs1)), speed); + + if (TREE_CODE (rhs1) == INTEGER_CST) + return add_const_cost (TYPE_MODE (TREE_TYPE (rhs2)), speed); + + return add_regs_cost (lhs_mode, speed); + + case NEGATE_EXPR: + return negate_cost (lhs_mode, speed); + + case NOP_EXPR: + return extend_or_trunc_cost (TREE_TYPE (lhs), TREE_TYPE (rhs1), speed); + + case MODIFY_EXPR: + /* Be suspicious of assigning costs to copies that may well go away. */ + return 0; + + default: + ; + } + + gcc_unreachable (); + return 0; +} + +/* Look up the defining statement for BASE_IN and return a pointer + to its candidate in the candidate table, if any; otherwise NULL. */ + +static slsr_cand_t +base_cand_from_table (tree base_in) +{ + slsr_cand mapping_key; + + gimple def = SSA_NAME_DEF_STMT (base_in); + if (!def) + return (slsr_cand_t) NULL; + + mapping_key.cand_stmt = def; + return (slsr_cand_t) htab_find (stmt_cand_map, &mapping_key); +} + +/* Create a candidate entry for a statement GS at GSI, where GS + multiplies two SSA names BASE_IN and STRIDE_IN. Propagate any + known information about the two SSA names into the new candidate. + Return the new candidate. */ + +static slsr_cand_t +create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, + gimple_stmt_iterator gsi, bool speed) +{ + tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; + double_int index; + unsigned savings = 0; + slsr_cand_t c; + slsr_cand_t base_cand = base_cand_from_table (base_in); + + /* Look at all interpretations of the base candidate, if necessary, + to find information to propagate into this candidate. */ + while (base_cand && !base) + { + + if (base_cand->kind == CAND_MULT + && operand_equal_p (base_cand->stride, integer_one_node, 0)) + { + /* Y = (B + i') * 1 + X = Y * Z + ================ + X = (B + i') * Z */ + base = base_cand->base_name; + index = base_cand->index; + stride = stride_in; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + else if (base_cand->kind == CAND_ADD + && TREE_CODE (base_cand->stride) == INTEGER_CST) + { + /* Y = B + (i' * S), S constant + X = Y * Z + ============================ + X = B + ((i' * S) * Z) */ + base = base_cand->base_name; + index = double_int_mul (base_cand->index, + tree_to_double_int (base_cand->stride)); + stride = stride_in; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + + if (!base) + { + /* No interpretations had anything useful to propagate, so + produce X = (Y + 0) * Z. */ + base = base_in; + index = double_int_zero; + stride = stride_in; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + } + + c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, + ctype, savings, gsi); + return c; +} + +/* Create a candidate entry for a statement GS at GSI, where GS + multiplies SSA name BASE_IN by constant STRIDE_IN. Propagate any + known information about BASE_IN into the new candidate. Return + the new candidate. */ + +static slsr_cand_t +create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, + gimple_stmt_iterator gsi, bool speed) +{ + tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; + double_int index, temp; + unsigned savings = 0; + slsr_cand_t c; + slsr_cand_t base_cand = base_cand_from_table (base_in); + + /* Look at all interpretations of the base candidate, if necessary, + to find information to propagate into this candidate. */ + while (base_cand && !base) + { + if (base_cand->kind == CAND_MULT + && TREE_CODE (base_cand->stride) == INTEGER_CST) + { + /* Y = (B + i') * S, S constant + X = Y * c + ============================ + X = (B + i') * (S * c) */ + base = base_cand->base_name; + index = base_cand->index; + temp = double_int_mul (tree_to_double_int (base_cand->stride), + tree_to_double_int (stride_in)); + stride = double_int_to_tree (TREE_TYPE (stride_in), temp); + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + else if (base_cand->kind == CAND_ADD + && operand_equal_p (base_cand->stride, integer_one_node, 0)) + { + /* Y = B + (i' * 1) + X = Y * c + =========================== + X = (B + i') * c */ + base = base_cand->base_name; + index = base_cand->index; + stride = stride_in; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + else if (base_cand->kind == CAND_ADD + && double_int_one_p (base_cand->index) + && TREE_CODE (base_cand->stride) == INTEGER_CST) + { + /* Y = B + (1 * S), S constant + X = Y * c + =========================== + X = (B + S) * c */ + base = base_cand->base_name; + index = tree_to_double_int (base_cand->stride); + stride = stride_in; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + + if (!base) + { + /* No interpretations had anything useful to propagate, so + produce X = (Y + 0) * c. */ + base = base_in; + index = double_int_zero; + stride = stride_in; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + } + + c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, + ctype, savings, gsi); + return c; +} + +/* Given GS which is a multiply of scalar integers, make an appropriate + entry in the candidate table. If this is a multiply of two SSA names, + create two CAND_MULT interpretations and attempt to find a basis for + each of them. Otherwise, create a single CAND_MULT and attempt to + find a basis. */ + +static void +slsr_process_mul (gimple_stmt_iterator gsi, gimple gs, bool speed) +{ + tree rhs1 = gimple_assign_rhs1 (gs); + tree rhs2 = gimple_assign_rhs2 (gs); + slsr_cand_t c, c2; + void **slot; + + if (TREE_CODE (rhs2) != INTEGER_CST && TREE_CODE (rhs2) != SSA_NAME) + return; + + /* If this is a multiply of an SSA name with itself, it is highly + unlikely that we will get a strength reduction opportunity, so + don't record it as a candidate. This simplifies the logic for + finding a basis, so if this is removed that must be considered. */ + if (TREE_CODE (rhs2) == SSA_NAME && operand_equal_p (rhs1, rhs2, 0)) + return; + + if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME) + { + /* Record an interpretation of this statement in the candidate table + assuming RHS1 is the base name and RHS2 is the stride. */ + c = create_mul_ssa_cand (gs, rhs1, rhs2, gsi, speed); + + /* Add the first interpretation to the statement-candidate mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; + + /* Record another interpretation of this statement assuming RHS1 + is the stride and RHS2 is the base name. */ + c2 = create_mul_ssa_cand (gs, rhs2, rhs1, gsi, speed); + c->next_interp = c2->cand_num; + } + else + { + /* Canonicalize the multiply-immediate case. */ + if (TREE_CODE (rhs1) == INTEGER_CST) + { + tree swapper = rhs1; + rhs1 = rhs2; + rhs2 = swapper; + } + + if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != INTEGER_CST) + return; + + /* Record an interpretation for the multiply-immediate. */ + c = create_mul_imm_cand (gs, rhs1, rhs2, gsi, speed); + + /* Add the interpretation to the statement-candidate mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; + } +} + +/* Create a candidate entry for a statement GS at GSI, where GS + adds two SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, + and subtracts ADDEND_IN from BASE_IN otherwise. Propagate any + known information about the two SSA names into the new candidate. + Return the new candidate. */ + +static slsr_cand_t +create_add_ssa_cand (gimple gs, tree base_in, tree addend_in, + bool subtract_p, gimple_stmt_iterator gsi, bool speed) +{ + tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL; + double_int index; + unsigned savings = 0; + slsr_cand_t c; + slsr_cand_t base_cand = base_cand_from_table (base_in); + slsr_cand_t addend_cand = base_cand_from_table (addend_in); + + /* The most useful transformation is a multiply-immediate feeding + an add or subtract. Look for that first. */ + while (addend_cand && !base) + { + if (addend_cand->kind == CAND_MULT + && double_int_zero_p (addend_cand->index) + && TREE_CODE (addend_cand->stride) == INTEGER_CST) + { + /* Z = (B + 0) * S, S constant + X = Y +/- Z + =========================== + X = Y + ((+/-1 * S) * B) */ + base = base_in; + index = tree_to_double_int (addend_cand->stride); + if (subtract_p) + index = double_int_neg (index); + stride = addend_cand->base_name; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + if (has_single_use (addend_in)) + savings = (addend_cand->dead_savings + + stmt_cost (addend_cand->cand_stmt, speed)); + } + + if (addend_cand->next_interp) + addend_cand = lookup_cand (addend_cand->next_interp); + else + addend_cand = NULL; + } + + while (base_cand && !base) + { + if (base_cand->kind == CAND_ADD + && (double_int_zero_p (base_cand->index) + || operand_equal_p (base_cand->stride, + integer_zero_node, 0))) + { + /* Y = B + (i' * S), i' * S = 0 + X = Y +/- Z + ============================ + X = B + (+/-1 * Z) */ + base = base_cand->base_name; + index = subtract_p ? double_int_minus_one : double_int_one; + stride = addend_in; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + else if (subtract_p) + { + slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in); + + while (subtrahend_cand && !base) + { + if (subtrahend_cand->kind == CAND_MULT + && double_int_zero_p (subtrahend_cand->index) + && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST) + { + /* Z = (B + 0) * S, S constant + X = Y - Z + =========================== + Value: X = Y + ((-1 * S) * B) */ + base = base_in; + index = tree_to_double_int (subtrahend_cand->stride); + index = double_int_neg (index); + stride = subtrahend_cand->base_name; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + if (has_single_use (addend_in)) + savings = (subtrahend_cand->dead_savings + + stmt_cost (subtrahend_cand->cand_stmt, speed)); + } + + if (subtrahend_cand->next_interp) + subtrahend_cand = lookup_cand (subtrahend_cand->next_interp); + else + subtrahend_cand = NULL; + } + } + + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + + if (!base) + { + /* No interpretations had anything useful to propagate, so + produce X = Y + (1 * Z). */ + base = base_in; + index = subtract_p ? double_int_minus_one : double_int_one; + stride = addend_in; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + } + + c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride, + ctype, savings, gsi); + return c; +} + +/* Return TRUE iff PRODUCT is an integral multiple of FACTOR, and return + the multiple in *MULTIPLE. Otherwise return FALSE and leave *MULTIPLE + unchanged. */ +/* ??? - Should this be moved to double-int.c? */ + +static bool +double_int_multiple_of (double_int product, double_int factor, + bool unsigned_p, double_int *multiple) +{ + double_int remainder; + double_int quotient = double_int_divmod (product, factor, unsigned_p, + TRUNC_DIV_EXPR, &remainder); + if (double_int_zero_p (remainder)) + { + *multiple = quotient; + return true; + } + + return false; +} + +/* Create a candidate entry for a statement GS at GSI, where GS + adds SSA name BASE_IN to constant INDEX_IN. Propagate any + known information about BASE_IN into the new candidate. Return + the new candidate. */ + +static slsr_cand_t +create_add_imm_cand (gimple gs, tree base_in, double_int index_in, + gimple_stmt_iterator gsi, bool speed) +{ + enum cand_kind kind = CAND_ADD; + tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; + double_int index, multiple; + unsigned savings = 0; + slsr_cand_t c; + slsr_cand_t base_cand = base_cand_from_table (base_in); + + while (base_cand && !base) + { + bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride)); + + if (TREE_CODE (base_cand->stride) == INTEGER_CST + && double_int_multiple_of (index_in, + tree_to_double_int (base_cand->stride), + unsigned_p, + &multiple)) + { + /* Y = (B + i') * S, S constant, c = kS for some integer k + X = Y + c + ============================ + X = (B + (i'+ k)) * S + OR + Y = B + (i' * S), S constant, c = kS for some integer k + X = Y + c + ============================ + X = (B + (i'+ k)) * S */ + kind = base_cand->kind; + base = base_cand->base_name; + index = double_int_add (base_cand->index, multiple); + stride = base_cand->stride; + ctype = base_cand->cand_type; + if (has_single_use (base_in)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + } + + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + + if (!base) + { + /* No interpretations had anything useful to propagate, so + produce X = Y + (c * 1). */ + kind = CAND_ADD; + base = base_in; + index = index_in; + stride = integer_one_node; + ctype = TREE_TYPE (SSA_NAME_VAR (base_in)); + } + + c = alloc_cand_and_find_basis (kind, gs, base, index, stride, + ctype, savings, gsi); + return c; +} + +/* Given GS which is an add or subtract of scalar integers or pointers, + make at least one appropriate entry in the candidate table. */ + +static void +slsr_process_add (gimple_stmt_iterator gsi, gimple gs, bool speed) +{ + tree rhs1 = gimple_assign_rhs1 (gs); + tree rhs2 = gimple_assign_rhs2 (gs); + bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR; + slsr_cand_t c, c2; + void **slot; + + if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME) + { + /* First record an interpretation assuming RHS1 is the base + name and RHS2 is the stride. */ + c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, gsi, speed); + + /* Add the first interpretation to the statement-candidate mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; + + /* If the two RHS operands are identical, or this is a subtract, + we're done. */ + if (operand_equal_p (rhs1, rhs2, 0) || subtract_p) + return; + + /* Otherwise, record another interpretation assuming RHS2 is the + base name and RHS1 is the stride. */ + c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, gsi, speed); + c->next_interp = c2->cand_num; + } + else + { + double_int index; + + /* Canonicalize the add-immediate. */ + if (!subtract_p && TREE_CODE (rhs1) == INTEGER_CST) + { + tree swapper = rhs1; + rhs1 = rhs2; + rhs2 = swapper; + } + + if (TREE_CODE (rhs1) != SSA_NAME || TREE_CODE (rhs2) != INTEGER_CST) + return; + + /* Record an interpretation for the add-immediate. */ + index = tree_to_double_int (rhs2); + if (subtract_p) + index = double_int_neg (index); + + c = create_add_imm_cand (gs, rhs1, index, gsi, speed); + + /* Add the interpretation to the statement-candidate mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; + } +} + +/* Given GS which is a negate of a scalar integer, make an appropriate + entry in the candidate table. A negate is equivalent to a multiply + by -1. */ + +static void +slsr_process_neg (gimple_stmt_iterator gsi, gimple gs, bool speed) +{ + /* Record a CAND_MULT interpretation for the multiply by -1. */ + tree rhs1 = gimple_assign_rhs1 (gs); + slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, + gsi, speed); + + /* Add the interpretation to the statement-candidate mapping. */ + void **slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; +} + +/* Return TRUE if GS is a statement that defines an SSA name from + a NOP_EXPR and is legal for us to combine an add and multiply + across. This is legitimate for casts from a signed type to + a signed or unsigned type of the same or larger size. It is not + legitimate to convert any unsigned type to a signed type, or + to an unsigned type of a different size. + + The reasoning here is that signed integer overflow is undefined, + so any program that was expecting overflow that no longer occurs + is not correct. Unsigned integers, however, have wrap semantics, + and it is reasonable for programs to assume an overflowing add + will wrap. */ + +static bool +legal_cast_p (gimple gs) +{ + tree lhs, rhs, lhs_type, rhs_type; + unsigned lhs_size, rhs_size, lhs_uns, rhs_uns; + + if (!is_gimple_assign (gs) + || TREE_CODE (gimple_assign_lhs (gs)) != SSA_NAME + || gimple_assign_rhs_code (gs) != NOP_EXPR) + return false; + + lhs = gimple_assign_lhs (gs); + rhs = gimple_assign_rhs1 (gs); + + if (TREE_CODE (rhs) != SSA_NAME) + return false; + + lhs_type = TREE_TYPE (SSA_NAME_VAR (lhs)); + rhs_type = TREE_TYPE (SSA_NAME_VAR (rhs)); + lhs_size = TYPE_PRECISION (lhs_type); + rhs_size = TYPE_PRECISION (rhs_type); + lhs_uns = TYPE_UNSIGNED (lhs_type); + rhs_uns = TYPE_UNSIGNED (rhs_type); + + if (lhs_size < rhs_size) + return false; + + if (rhs_uns && !lhs_uns) + return false; + + if (rhs_uns && lhs_uns && rhs_size != lhs_size) + return false; + + return true; +} + +/* Given GS which is a cast to a scalar integer type, determine whether + the cast is legal for strength reduction. If so, make at least one + appropriate entry in the candidate table. */ + +static void +slsr_process_cast (gimple_stmt_iterator gsi, gimple gs, bool speed) +{ + tree lhs, rhs1, ctype; + slsr_cand_t base_cand, c, c2; + unsigned savings = 0; + void **slot; + + if (!legal_cast_p (gs)) + return; + + lhs = gimple_assign_lhs (gs); + rhs1 = gimple_assign_rhs1 (gs); + base_cand = base_cand_from_table (rhs1); + ctype = TREE_TYPE (lhs); + + if (base_cand) + { + while (base_cand) + { + /* Propagate all data from the base candidate except the type, + which comes from the cast, and the base candidate's cast, + which is no longer applicable. */ + if (has_single_use (rhs1)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + + c = alloc_cand_and_find_basis (base_cand->kind, gs, + base_cand->base_name, + base_cand->index, base_cand->stride, + ctype, savings, gsi); + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + } + else + { + /* If nothing is known about the RHS, create fresh CAND_ADD and + CAND_MULT interpretations: + + X = Y + (0 * 1) + X = (Y + 0) * 1 + + The first of these is somewhat arbitrary, but the choice of + 1 for the stride simplifies the logic for propagating casts + into their uses. */ + c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, + integer_one_node, ctype, 0, gsi); + c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, + integer_one_node, ctype, 0, gsi); + c->next_interp = c2->cand_num; + } + + /* Add the first (or only) interpretation to the statement-candidate + mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; +} + +/* Given GS which is a copy of a scalar integer type, make at least one + appropriate entry in the candidate table. + + This interface is included for completeness, but is unnecessary + if this pass immediately follows a pass that performs copy + propagation, such as DOM. */ + +static void +slsr_process_copy (gimple_stmt_iterator gsi, gimple gs, bool speed) +{ + slsr_cand_t base_cand, c, c2; + void **slot; + unsigned savings = 0; + tree rhs1 = gimple_assign_rhs1 (gs); + + if (TREE_CODE (rhs1) != SSA_NAME) + return; + + base_cand = base_cand_from_table (rhs1); + + if (base_cand) + { + while (base_cand) + { + /* Propagate all data from the base candidate. */ + if (has_single_use (rhs1)) + savings = (base_cand->dead_savings + + stmt_cost (base_cand->cand_stmt, speed)); + + c = alloc_cand_and_find_basis (base_cand->kind, gs, + base_cand->base_name, + base_cand->index, base_cand->stride, + base_cand->cand_type, savings, gsi); + if (base_cand->next_interp) + base_cand = lookup_cand (base_cand->next_interp); + else + base_cand = NULL; + } + } + else + { + /* If nothing is known about the RHS, create fresh CAND_ADD and + CAND_MULT interpretations: + + X = Y + (0 * 1) + X = (Y + 0) * 1 + + The first of these is somewhat arbitrary, but the choice of + 1 for the stride simplifies the logic for propagating casts + into their uses. */ + c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, + integer_one_node, TREE_TYPE (rhs1), + 0, gsi); + c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, + integer_one_node, TREE_TYPE (rhs1), + 0, gsi); + c->next_interp = c2->cand_num; + } + + /* Add the first (or only) interpretation to the statement-candidate + mapping. */ + slot = htab_find_slot (stmt_cand_map, c, INSERT); + gcc_assert (!*slot); + *slot = c; +} + +/* Find strength-reduction candidates in block BB. */ + +static void +find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, + basic_block bb) +{ + bool speed = optimize_bb_for_speed_p (bb); + gimple_stmt_iterator gsi; + + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple gs = gsi_stmt (gsi); + if (is_gimple_assign (gs) + && SCALAR_INT_MODE_P (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) + { + switch (gimple_assign_rhs_code (gs)) + { + case MULT_EXPR: + slsr_process_mul (gsi, gs, speed); + break; + + case PLUS_EXPR: + case POINTER_PLUS_EXPR: + case MINUS_EXPR: + slsr_process_add (gsi, gs, speed); + break; + + case NEGATE_EXPR: + slsr_process_neg (gsi, gs, speed); + break; + + case NOP_EXPR: + slsr_process_cast (gsi, gs, speed); + break; + + case MODIFY_EXPR: + slsr_process_copy (gsi, gs, speed); + break; + + default: + ; + } + } + } +} + +/* Dump a candidate for debug. */ + +static void +dump_candidate (slsr_cand_t c) +{ + fprintf (dump_file, "%3d [%d] ", c->cand_num, + gimple_bb (c->cand_stmt)->index); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + switch (c->kind) + { + case CAND_MULT: + fputs (" MULT : (", dump_file); + print_generic_expr (dump_file, c->base_name, 0); + fputs (" + ", dump_file); + dump_double_int (dump_file, c->index, false); + fputs (") * ", dump_file); + print_generic_expr (dump_file, c->stride, 0); + fputs (" : ", dump_file); + break; + case CAND_ADD: + fputs (" ADD : ", dump_file); + print_generic_expr (dump_file, c->base_name, 0); + fputs (" + (", dump_file); + dump_double_int (dump_file, c->index, false); + fputs (" * ", dump_file); + print_generic_expr (dump_file, c->stride, 0); + fputs (") : ", dump_file); + break; + default: + gcc_unreachable (); + } + print_generic_expr (dump_file, c->cand_type, 0); + fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n", + c->basis, c->dependent, c->sibling); + fprintf (dump_file, " next-interp: %d dead-savings: %d\n", + c->next_interp, c->dead_savings); + if (c->def_phi) + { + fputs (" phi: ", dump_file); + print_gimple_stmt (dump_file, c->def_phi, 0, 0); + } + fputs ("\n", dump_file); +} + +/* Dump the candidate vector for debug. */ + +static void +dump_cand_vec (void) +{ + unsigned i; + slsr_cand_t c; + + fprintf (dump_file, "\nStrength reduction candidate vector:\n\n"); + + FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) + dump_candidate (c); +} + +/* Callback used to dump the candidate chains hash table. */ + +static int +base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED) +{ + const_cand_chain_t chain = *((const_cand_chain_t *) slot); + cand_chain_t p; + + print_generic_expr (dump_file, chain->base_name, 0); + fprintf (dump_file, " -> %d", chain->cand->cand_num); + + for (p = chain->next; p; p = p->next) + fprintf (dump_file, " -> %d", p->cand->cand_num); + + fputs ("\n", dump_file); + return 1; +} + +/* Dump the candidate chains hash table. */ + +static void +dump_cand_chains (void) +{ + fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); + htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL); + fputs ("\n", dump_file); +} + +/* Dump the increment vector for debug. */ + +static void +dump_incr_vec (void) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + unsigned i; + + fprintf (dump_file, "\nIncrement vector:\n\n"); + + for (i = 0; i < incr_vec_len; i++) + { + fprintf (dump_file, "%3d increment: ", i); + dump_double_int (dump_file, incr_vec[i].incr, false); + fprintf (dump_file, "\n count: %d", incr_vec[i].count); + fprintf (dump_file, "\n cost: %d", incr_vec[i].cost); + fputs ("\n initializer: ", dump_file); + print_generic_expr (dump_file, incr_vec[i].initializer, 0); + fputs ("\n\n", dump_file); + } + } +} + +/* Recursive helper for unconditional_cands_with_known_stride_p. + Returns TRUE iff C, its siblings, and its dependents are all + unconditional candidates. */ + +static bool +unconditional_cands (slsr_cand_t c) +{ + if (c->def_phi) + return false; + + if (c->sibling && !unconditional_cands (lookup_cand (c->sibling))) + return false; + + if (c->dependent && !unconditional_cands (lookup_cand (c->dependent))) + return false; + + return true; +} + +/* Determine whether or not the tree of candidates rooted at + ROOT consists entirely of unconditional increments with + an INTEGER_CST stride. */ + +static bool +unconditional_cands_with_known_stride_p (slsr_cand_t root) +{ + /* The stride is identical for all related candidates, so + check it once. */ + if (TREE_CODE (root->stride) != INTEGER_CST) + return false; + + return unconditional_cands (lookup_cand (root->dependent)); +} + +/* Determine whether or not the tree of candidates rooted at + ROOT consists entirely of unconditional increments with + an SSA_NAME stride. */ + +static bool +unconditional_cands_with_unknown_stride_p (slsr_cand_t root) +{ + /* The stride is identical for all related candidates, so + check it once. */ + if (TREE_CODE (root->stride) != SSA_NAME) + return false; + + return unconditional_cands (lookup_cand (root->dependent)); +} + +/* Calculate the increment required for candidate C relative to + its basis. */ + +static double_int +cand_increment (slsr_cand_t c) +{ + slsr_cand_t basis; + + /* If the candidate doesn't have a basis, just return its own + index. This is useful in record_increments to help us find + an existing initializer. */ + if (!c->basis) + return c->index; + + basis = lookup_cand (c->basis); + gcc_assert (operand_equal_p (c->base_name, basis->base_name, 0)); + return double_int_sub (c->index, basis->index); +} + +/* Replace candidate C, each sibling of candidate C, and each + dependent of candidate C with an add or subtract. Note that we + only operate on CAND_MULTs with known strides, so we will never + generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is + replaced by X = Y + ((i - i') * S), as described in the module + commentary. The folded value ((i - i') * S) is referred to here + as the "bump." */ + +static void +replace_dependents (slsr_cand_t c) +{ + slsr_cand_t basis = lookup_cand (c->basis); + tree basis_name = gimple_assign_lhs (basis->cand_stmt); + tree incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt)); + double_int stride = tree_to_double_int (c->stride); + double_int bump = double_int_mul (cand_increment (c), stride); + enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt); + + /* It is highly unlikely, but possible, that the resulting + bump doesn't fit in a HWI. Abandon the replacement + in this case. This does not affect siblings or dependents + of C. Restriction to signed HWI is conservative for unsigned + types but allows for safe negation without twisted logic. */ + if (double_int_fits_in_shwi_p (bump) + /* It is not useful to replace casts, copies, or adds of + an SSA name and a constant. */ + && cand_code != MODIFY_EXPR + && cand_code != NOP_EXPR + && c->kind != CAND_ADD) + { + enum tree_code code = PLUS_EXPR; + tree bump_tree; + gimple stmt_to_print = NULL; + + if (double_int_negative_p (bump)) + { + code = MINUS_EXPR; + bump = double_int_neg (bump); + } + + bump_tree = double_int_to_tree (incr_type, bump); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Replacing: ", dump_file); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + } + + if (double_int_zero_p (bump)) + { + tree lhs = gimple_assign_lhs (c->cand_stmt); + gimple copy_stmt = gimple_build_assign (lhs, basis_name); + gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); + gsi_replace (&c->cand_gsi, copy_stmt, false); + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = copy_stmt; + } + else + { + tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); + tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); + if (cand_code != NEGATE_EXPR + && ((operand_equal_p (rhs1, basis_name, 0) + && operand_equal_p (rhs2, bump_tree, 0)) + || (operand_equal_p (rhs1, bump_tree, 0) + && operand_equal_p (rhs2, basis_name, 0)))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("(duplicate, not actually replacing)", dump_file); + stmt_to_print = c->cand_stmt; + } + } + else + { + gimple_assign_set_rhs_with_ops (&c->cand_gsi, code, + basis_name, bump_tree); + update_stmt (gsi_stmt (c->cand_gsi)); + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = gsi_stmt (c->cand_gsi); + } + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("With: ", dump_file); + print_gimple_stmt (dump_file, stmt_to_print, 0, 0); + fputs ("\n", dump_file); + } + } + + if (c->sibling) + replace_dependents (lookup_cand (c->sibling)); + + if (c->dependent) + replace_dependents (lookup_cand (c->dependent)); +} + +/* Count the number of candidates in the tree rooted at C. */ + +static unsigned +count_candidates (slsr_cand_t c) +{ + unsigned count = 1; + + if (c->sibling) + count += count_candidates (lookup_cand (c->sibling)); + + if (c->dependent) + count += count_candidates (lookup_cand (c->dependent)); + + return count; +} + +/* Determine how many times each unique increment occurs in the set + of candidates rooted at C's parent, recording the data in the + increment vector. For each unique increment I, if an initializer + T_0 = stride * I is provided by a candidate that dominates all + candidates with the same increment, also record T_0 for subsequent + use. */ + +static void +record_increments (slsr_cand_t c) +{ + double_int increment = cand_increment (c); + bool found = false; + unsigned i; + + for (i = 0; i < incr_vec_len; i++) + { + if (double_int_equal_p (incr_vec[i].incr, increment)) + { + incr_vec[i].count++; + found = true; + + /* If we previously recorded an initializer that doesn't + dominate this candidate, it's not going to be useful to + us after all. */ + if (incr_vec[i].initializer + && !dominated_by_p (CDI_DOMINATORS, + gimple_bb (c->cand_stmt), + incr_vec[i].init_bb)) + { + incr_vec[i].initializer = NULL_TREE; + incr_vec[i].init_bb = NULL; + } + + break; + } + } + + if (!found) + { + /* The first time we see an increment, create the entry for it. + If this candidate doesn't have a basis, set the count to zero. + We're only processing it so it can possibly provide an + initializer for other candidates. */ + incr_vec[incr_vec_len].incr = increment; + incr_vec[incr_vec_len].count = c->basis ? 1 : 0; + incr_vec[incr_vec_len].cost = COST_INFINITE; + + /* Optimistically record the first occurrence of this increment + as providing an initializer (if it does); we will revise this + opinion later if it doesn't dominate all other occurrences. + Exception: increments of -1, 0, 1 never need initializers. */ + if (c->kind == CAND_ADD + && double_int_equal_p (c->index, increment) + && (double_int_scmp (increment, double_int_one) > 0 + || double_int_scmp (increment, double_int_minus_one) < 0)) + { + tree t0; + tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); + tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); + if (operand_equal_p (rhs1, c->base_name, 0)) + t0 = rhs2; + else + t0 = rhs1; + if (SSA_NAME_DEF_STMT (t0) && gimple_bb (SSA_NAME_DEF_STMT (t0))) + { + incr_vec[incr_vec_len].initializer = t0; + incr_vec[incr_vec_len++].init_bb + = gimple_bb (SSA_NAME_DEF_STMT (t0)); + } + else + { + incr_vec[incr_vec_len].initializer = NULL_TREE; + incr_vec[incr_vec_len++].init_bb = NULL; + } + } + else + { + incr_vec[incr_vec_len].initializer = NULL_TREE; + incr_vec[incr_vec_len++].init_bb = NULL; + } + } + + if (c->sibling) + record_increments (lookup_cand (c->sibling)); + + if (c->dependent) + record_increments (lookup_cand (c->dependent)); +} + +/* Add COST_IN to the lowest cost of any dependent path starting at + candidate C or any of its siblings, counting only candidates along + such paths with increment INCR. Assume that replacing a candidate + reduces cost by REPL_SAVINGS. Also account for savings from any + statements that would go dead. */ + +static int +lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, + double_int incr) +{ + int local_cost, sib_cost; + + if (double_int_equal_p (incr, cand_increment (c))) + local_cost = cost_in - repl_savings - c->dead_savings; + else + local_cost = cost_in - c->dead_savings; + + if (c->dependent) + local_cost = lowest_cost_path (local_cost, repl_savings, + lookup_cand (c->dependent), incr); + if (c->sibling) + { + sib_cost = lowest_cost_path (cost_in, repl_savings, + lookup_cand (c->sibling), incr); + local_cost = MIN (local_cost, sib_cost); + } + + return local_cost; +} + +/* Compute the total savings that would accrue from all replacements + in the candidate tree rooted at C, counting only candidates with + increment INCR. Assume that replacing a candidate reduces cost + by REPL_SAVINGS. Also account for savings from statements that + would go dead. */ + +static int +total_savings (int repl_savings, slsr_cand_t c, double_int incr) +{ + int savings = 0; + + if (double_int_equal_p (incr, cand_increment (c))) + savings += repl_savings + c->dead_savings; + + if (c->dependent) + savings += total_savings (repl_savings, lookup_cand (c->dependent), incr); + + if (c->sibling) + savings += total_savings (repl_savings, lookup_cand (c->sibling), incr); + + return savings; +} + +/* Use target-specific costs to determine and record which increments + in the current candidate tree are profitable to replace, assuming + MODE and SPEED. FIRST_DEP is the first dependent of the root of + the candidate tree. + + One slight limitation here is that we don't account for the possible + introduction of casts in some cases. See replace_one_candidate for + the cases where these are introduced. This should probably be cleaned + up sometime. */ + +static void +analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed) +{ + unsigned i; + + for (i = 0; i < incr_vec_len; i++) + { + HOST_WIDE_INT incr = double_int_to_shwi (incr_vec[i].incr); + + /* If somehow this increment is bigger than a HWI, we won't + be optimizing candidates that use it. And if the increment + has a count of zero, nothing will be done with it. */ + if (!double_int_fits_in_shwi_p (incr_vec[i].incr) + || !incr_vec[i].count) + incr_vec[i].cost = COST_INFINITE; + + /* Increments of 0, 1, and -1 are always profitable to replace, + because they always replace a multiply or add with an add or + copy, and may cause one or more existing instructions to go + dead. Exception: -1 can't be assumed to be profitable for + pointer addition. */ + else if (incr == 0 + || incr == 1 + || (incr == -1 + && (gimple_assign_rhs_code (first_dep->cand_stmt) + != POINTER_PLUS_EXPR))) + incr_vec[i].cost = COST_NEUTRAL; + + /* For any other increment, if this is a multiply candidate, we + must introduce a temporary T and initialize it with + T_0 = stride * increment. When optimizing for speed, walk the + candidate tree to calculate the best cost reduction along any + path; if it offsets the fixed cost of inserting the initializer, + replacing the increment is profitable. When optimizing for + size, instead calculate the total cost reduction from replacing + all candidates with this increment. */ + else if (first_dep->kind == CAND_MULT) + { + int cost = multiply_by_cost (incr, mode, speed); + int repl_savings + = multiply_regs_cost (mode, speed) - add_regs_cost (mode, speed); + + if (speed) + cost = lowest_cost_path (cost, repl_savings, first_dep, + incr_vec[i].incr); + else + cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr); + + incr_vec[i].cost = cost; + } + + /* If this is an add candidate, the initializer may already + exist, so only calculate the cost of the initializer if it + doesn't. We are replacing one add with another here, so the + known replacement savings is zero. We will account for removal + of dead instructions in lowest_cost_path or total_savings. */ + else + { + int cost = 0; + if (!incr_vec[i].initializer) + cost = multiply_by_cost (incr, mode, speed); + + if (speed) + cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr); + else + cost -= total_savings (0, first_dep, incr_vec[i].incr); + + incr_vec[i].cost = cost; + } + } +} + +/* Return the nearest common dominator of BB1 and BB2. If the blocks + are identical, return the earlier of C1 and C2 in *WHERE. Otherwise, + if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2, + return C2 in *WHERE; and if the NCD matches neither, return NULL in + *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */ + +static basic_block +nearest_common_dominator_for_two_cands (basic_block bb1, basic_block bb2, + slsr_cand_t c1, slsr_cand_t c2, + slsr_cand_t *where) +{ + basic_block ncd; + + if (!bb1) + { + *where = c2; + return bb2; + } + + if (!bb2) + { + *where = c1; + return bb1; + } + + ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2); + + /* If both candidates are in the same block, the earlier + candidate wins. */ + if (bb1 == ncd && bb2 == ncd) + { + gcc_assert (c1 && c2); + if (c1->cand_num < c2->cand_num) + *where = c1; + else + *where = c2; + } + + /* Otherwise, if one of them produced a candidate in the + dominator, that one wins. */ + else if (bb1 == ncd) + *where = c1; + + else if (bb2 == ncd) + *where = c2; + + /* If neither matches the dominator, neither wins. */ + else + *where = NULL; + + return ncd; +} + +/* Consider all candidates in the tree rooted at C for which INCR + represents the required increment of C relative to its basis. + Find and return the basic block that most nearly dominates all + such candidates. If the returned block contains one or more of + the candidates, return the earliest candidate in the block in + *WHERE. */ + +static basic_block +nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr, + slsr_cand_t *where) +{ + basic_block sib_ncd = NULL, dep_ncd = NULL, ncd; + slsr_cand_t sib_where = NULL, dep_where = NULL, new_where; + double_int cand_incr = cand_increment (c); + + /* First find the NCD of all siblings and dependents. */ + if (c->sibling) + sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling), + incr, &sib_where); + if (c->dependent) + dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent), + incr, &dep_where); + if (!sib_ncd && !dep_ncd) + { + new_where = NULL; + ncd = NULL; + } + else if (sib_ncd && !dep_ncd) + { + new_where = sib_where; + ncd = sib_ncd; + } + else if (dep_ncd && !sib_ncd) + { + new_where = dep_where; + ncd = dep_ncd; + } + else + ncd = nearest_common_dominator_for_two_cands (sib_ncd, dep_ncd, + sib_where, dep_where, + &new_where); + + /* If the candidate's increment doesn't match the one we're interested + in, then the result depends only on siblings and dependents. */ + if (!double_int_equal_p (cand_incr, incr)) + { + *where = new_where; + return ncd; + } + + /* Otherwise, compare this candidate with the result from all siblings + and dependents. */ + ncd = nearest_common_dominator_for_two_cands (ncd, gimple_bb (c->cand_stmt), + new_where, c, where); + return ncd; +} + +/* Return TRUE if the increment indexed by INDEX is profitable to replace. */ + +static inline bool +profitable_increment_p (unsigned index) +{ + return (incr_vec[index].cost <= COST_NEUTRAL); +} + +/* For each profitable increment in the increment vector not equal to + 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common + dominator of all statements in the candidate chain rooted at C + that require that increment, and insert an initializer + T_0 = stride * increment at that location. Record T_0 with the + increment record. */ + +static void +insert_initializers (slsr_cand_t c) +{ + unsigned i; + tree new_var = NULL_TREE; + + for (i = 0; i < incr_vec_len; i++) + { + basic_block bb; + slsr_cand_t where = NULL; + gimple init_stmt; + tree stride_type, new_name, incr_tree; + + if (!profitable_increment_p (i) + || double_int_one_p (incr_vec[i].incr) + || (double_int_minus_one_p (incr_vec[i].incr) + && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR) + || double_int_zero_p (incr_vec[i].incr)) + continue; + + /* We may have already identified an existing initializer that + will suffice. */ + if (incr_vec[i].initializer) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Using existing initializer: ", dump_file); + print_gimple_stmt (dump_file, + SSA_NAME_DEF_STMT (incr_vec[i].initializer), + 0, 0); + } + continue; + } + + /* Find the block that most closely dominates all candidates + with this increment. If there is at least one candidate in + that block, the earliest one will be returned in WHERE. */ + bb = nearest_common_dominator_for_cands (c, incr_vec[i].incr, &where); + + /* Create a new SSA name to hold the initializer's value. */ + stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride)); + + if (!new_var) + { + new_var = create_tmp_reg (stride_type, "slsr"); + add_referenced_var (new_var); + } + + new_name = make_ssa_name (new_var, NULL); + incr_vec[i].initializer = new_name; + + /* Create the initializer and insert it in the latest possible + dominating position. */ + incr_tree = double_int_to_tree (stride_type, incr_vec[i].incr); + init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name, + c->stride, incr_tree); + if (where) + { + gsi_insert_before (&where->cand_gsi, init_stmt, GSI_SAME_STMT); + gimple_set_location (init_stmt, gimple_location (where->cand_stmt)); + } + else + { + gimple_stmt_iterator gsi = gsi_last_bb (bb); + gimple basis_stmt = lookup_cand (c->basis)->cand_stmt; + + if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi))) + gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); + else + gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT); + + gimple_set_location (init_stmt, gimple_location (basis_stmt)); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Inserting initializer: ", dump_file); + print_gimple_stmt (dump_file, init_stmt, 0, 0); + } + } +} + +/* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of + type TO_TYPE, and insert it in front of the statement represented + by candidate C. Use *NEW_VAR to create the new SSA name. Return + the new SSA name. */ + +static tree +introduce_cast_before_cand (slsr_cand_t c, tree to_type, + tree from_expr, tree *new_var) +{ + tree cast_lhs; + gimple cast_stmt; + + if (!*new_var) + { + *new_var = create_tmp_reg (to_type, "slsr"); + add_referenced_var (*new_var); + } + + cast_lhs = make_ssa_name (*new_var, NULL); + cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs, + from_expr, NULL_TREE); + gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); + gsi_insert_before (&c->cand_gsi, cast_stmt, GSI_SAME_STMT); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs (" (Inserting: ", dump_file); + print_gimple_stmt (dump_file, cast_stmt, 0, 0); + fputs (")\n", dump_file); + } + + return cast_lhs; +} + +/* Replace the RHS of the statement represented by candidate C with + NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't + leave C unchanged or just interchange its operands. The original + operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2. + If the replacement was made and we are doing a details dump, + return the revised statement, else NULL. */ + +static gimple +replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2, + enum tree_code old_code, tree old_rhs1, tree old_rhs2, + slsr_cand_t c) +{ + if (new_code != old_code + || ((!operand_equal_p (new_rhs1, old_rhs1, 0) + || !operand_equal_p (new_rhs2, old_rhs2, 0)) + && (!operand_equal_p (new_rhs1, old_rhs2, 0) + || !operand_equal_p (new_rhs2, old_rhs1, 0)))) + { + gimple_assign_set_rhs_with_ops (&c->cand_gsi, new_code, + new_rhs1, new_rhs2); + update_stmt (gsi_stmt (c->cand_gsi)); + + if (dump_file && (dump_flags & TDF_DETAILS)) + return gsi_stmt (c->cand_gsi); + } + + else if (dump_file && (dump_flags & TDF_DETAILS)) + fputs (" (duplicate, not actually replacing)\n", dump_file); + + return NULL; +} + +/* Strength-reduce the statement represented by candidate C by replacing + it with an equivalent addition or subtraction. I is the index into + the increment vector identifying C's increment. NEW_VAR is used to + create a new SSA name if a cast needs to be introduced. */ + +static void +replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var) +{ + gimple stmt_to_print = NULL; + tree orig_rhs1, orig_rhs2, basis_name; + slsr_cand_t basis; + tree rhs2; + enum tree_code orig_code, repl_code; + + orig_code = gimple_assign_rhs_code (c->cand_stmt); + orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt); + orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt); + basis = lookup_cand (c->basis); + basis_name = gimple_assign_lhs (basis->cand_stmt); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fputs ("Replacing: ", dump_file); + print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); + stmt_to_print = c->cand_stmt; + } + + if (orig_code == POINTER_PLUS_EXPR) + repl_code = POINTER_PLUS_EXPR; + else + repl_code = PLUS_EXPR; + + /* If the increment has an initializer T_0, replace the candidate + statement with an add of the basis name and the initializer. */ + if (incr_vec[i].initializer) + { + tree init_type = TREE_TYPE (SSA_NAME_VAR (incr_vec[i].initializer)); + tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2)); + + if (types_compatible_p (orig_type, init_type)) + rhs2 = incr_vec[i].initializer; + else + rhs2 = introduce_cast_before_cand (c, orig_type, + incr_vec[i].initializer, + new_var); + + stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, + orig_code, orig_rhs1, orig_rhs2, + c); + } + + /* Otherwise, the increment is one of -1, 0, and 1. Replace + with a subtract of the stride from the basis name, a copy + from the basis name, or an add of the stride to the basis + name, respectively. It may be necessary to introduce a + cast (or reuse an existing cast). */ + else if (double_int_one_p (incr_vec[i].incr)) + { + tree stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride)); + tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2)); + + if (types_compatible_p (orig_type, stride_type)) + rhs2 = c->stride; + else + rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); + + stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, + orig_code, orig_rhs1, orig_rhs2, + c); + } + + else if (double_int_minus_one_p (incr_vec[i].incr)) + { + tree stride_type = TREE_TYPE (SSA_NAME_VAR (c->stride)); + tree orig_type = TREE_TYPE (SSA_NAME_VAR (orig_rhs2)); + gcc_assert (repl_code != POINTER_PLUS_EXPR); + + if (types_compatible_p (orig_type, stride_type)) + rhs2 = c->stride; + else + rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); + + if (repl_code != MINUS_EXPR + || !operand_equal_p (basis_name, orig_rhs1, 0) + || !operand_equal_p (rhs2, orig_rhs2, 0)) + { + gimple_assign_set_rhs_with_ops (&c->cand_gsi, MINUS_EXPR, + basis_name, rhs2); + update_stmt (gsi_stmt (c->cand_gsi)); + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = gsi_stmt (c->cand_gsi); + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fputs (" (duplicate, not actually replacing)\n", dump_file); + } + + else if (double_int_zero_p (incr_vec[i].incr)) + { + tree lhs = gimple_assign_lhs (c->cand_stmt); + tree lhs_type = TREE_TYPE (SSA_NAME_VAR (lhs)); + tree basis_type = TREE_TYPE (SSA_NAME_VAR (basis_name)); + + if (types_compatible_p (lhs_type, basis_type)) + { + gimple copy_stmt = gimple_build_assign (lhs, basis_name); + gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); + gsi_replace (&c->cand_gsi, copy_stmt, false); + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = copy_stmt; + } + else + { + gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs, + basis_name, + NULL_TREE); + gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); + gsi_replace (&c->cand_gsi, cast_stmt, false); + if (dump_file && (dump_flags & TDF_DETAILS)) + stmt_to_print = cast_stmt; + } + } + else + gcc_unreachable (); + + if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print) + { + fputs ("With: ", dump_file); + print_gimple_stmt (dump_file, stmt_to_print, 0, 0); + fputs ("\n", dump_file); + } +} + +/* For each candidate in the tree rooted at C, replace it with + an increment if such has been shown to be profitable. */ + +static void +replace_profitable_candidates (slsr_cand_t c) +{ + double_int increment = cand_increment (c); + unsigned i; + tree new_var = NULL; + enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt); + + /* Find C's increment in the increment vector. */ + for (i = 0; + i < incr_vec_len && !double_int_equal_p (increment, incr_vec[i].incr); + i++) + ; + + gcc_assert (i < incr_vec_len); + + /* Only process profitable increments. Nothing useful can be done + to a cast or copy. */ + if (profitable_increment_p (i) + && orig_code != MODIFY_EXPR + && orig_code != NOP_EXPR) + replace_one_candidate (c, i, &new_var); + + if (c->sibling) + replace_profitable_candidates (lookup_cand (c->sibling)); + + if (c->dependent) + replace_profitable_candidates (lookup_cand (c->dependent)); +} + +/* Analyze costs of related candidates in the candidate vector, + and make beneficial replacements. */ + +static void +analyze_candidates_and_replace (void) +{ + unsigned i; + slsr_cand_t c; + + /* Each candidate that has a null basis and a non-null + dependent is the root of a tree of related statements. + Analyze each tree to determine a subset of those + statements that can be replaced with maximum benefit. */ + FOR_EACH_VEC_ELT (slsr_cand_t, cand_vec, i, c) + { + slsr_cand_t first_dep; + + if (c->basis != 0 || c->dependent == 0) + continue; + + /* If the candidate statement's block number is zero, that means + another interpretation of this statement has caused the statement + to be replaced. Don't worry about potential basis chains for + such already-handled statements. */ + if (!gimple_bb (c->cand_stmt)) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n", + c->cand_num); + + first_dep = lookup_cand (c->dependent); + + /* If the common stride of all related candidates is a + known constant, and none of these has a phi-dependence, + then all replacements are considered profitable. + Each replaces a multiply by a single add, with the + possibility that a feeding add also goes dead as a + result. */ + if (unconditional_cands_with_known_stride_p (c)) + replace_dependents (first_dep); + + /* When the stride is an SSA name, it may still be profitable + to replace some or all of the dependent candidates, depending + on whether the introduced increments can be reused, or are + less expensive to calculate than the replaced statements. */ + else if (unconditional_cands_with_unknown_stride_p (c)) + { + /* Construct an array of increments for this candidate chain. */ + unsigned length = count_candidates (c); + unsigned info_size = (sizeof (incr_info) + 15) & ~15; + enum machine_mode mode; + bool speed; + + incr_vec = XNEWVAR (incr_info, length * info_size); + incr_vec_len = 0; + record_increments (c); + + /* Determine which increments are profitable to replace. */ + mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt))); + speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt)); + analyze_increments (first_dep, mode, speed); + + /* Insert initializers of the form T_0 = stride * increment + for use in profitable replacements. */ + insert_initializers (first_dep); + dump_incr_vec (); + + /* Perform the replacements. */ + replace_profitable_candidates (first_dep); + free (incr_vec); + } + + /* TODO: When conditional increments occur so that a + candidate is dependent upon a phi-basis, the cost of + introducing a temporary must be accounted for. */ + + /* TODO: Strength reduction candidates hidden in + addressing expressions are not yet handled, and will + have more complex cost functions. */ + } +} + +static unsigned +execute_strength_reduction (void) +{ + struct dom_walk_data walk_data; + + /* Create the allocation pool where candidates will reside. */ + cand_pool = create_alloc_pool ("Strength reduction candidates", + sizeof (slsr_cand), 128); + + /* Allocate the candidate vector. */ + cand_vec = VEC_alloc (slsr_cand_t, heap, 128); + + /* Allocate the mapping from statements to candidate indices. */ + stmt_cand_map = htab_create (500, stmt_cand_hash, + stmt_cand_eq, stmt_cand_free); + + /* Create the allocation pool where candidate chains will reside. */ + chain_pool = create_alloc_pool ("Strength reduction chains", + sizeof (cand_chain), 128); + + /* Allocate the mapping from base names to candidate chains. */ + base_cand_map = htab_create (500, base_cand_hash, + base_cand_eq, base_cand_free); + + /* Initialize the loop optimizer. We need to detect flow across + back edges, and this gives us dominator information as well. */ + loop_optimizer_init (AVOID_CFG_MODIFICATIONS); + + /* Set up callbacks for the generic dominator tree walker. */ + walk_data.dom_direction = CDI_DOMINATORS; + walk_data.initialize_block_local_data = NULL; + walk_data.before_dom_children = find_candidates_in_block; + walk_data.after_dom_children = NULL; + walk_data.global_data = NULL; + walk_data.block_local_data_size = 0; + init_walk_dominator_tree (&walk_data); + + /* Walk the CFG in predominator order looking for strength reduction + candidates. */ + walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + dump_cand_vec (); + dump_cand_chains (); + } + + /* Analyze costs and make appropriate replacements. */ + analyze_candidates_and_replace (); + + /* Free resources. */ + fini_walk_dominator_tree (&walk_data); + loop_optimizer_finalize (); + htab_delete (base_cand_map); + free_alloc_pool (chain_pool); + htab_delete (stmt_cand_map); + VEC_free (slsr_cand_t, heap, cand_vec); + free_alloc_pool (cand_pool); + + return 0; +} + +static bool +gate_strength_reduction (void) +{ + return optimize > 0; +} + +struct gimple_opt_pass pass_strength_reduction = +{ + { + GIMPLE_PASS, + "slsr", /* name */ + gate_strength_reduction, /* gate */ + execute_strength_reduction, /* execute */ + NULL, /* sub */ + NULL, /* next */ + 0, /* static_pass_number */ + TV_TREE_SLSR, /* tv_id */ + PROP_cfg | PROP_ssa, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + TODO_dump_func | TODO_verify_ssa /* todo_flags_finish */ + } +}; Index: gcc/tree-flow.h =================================================================== --- gcc/tree-flow.h (revision 182962) +++ gcc/tree-flow.h (working copy) @@ -816,6 +816,11 @@ bool stmt_invariant_in_loop_p (struct loop *, gimp bool multiplier_allowed_in_address_p (HOST_WIDE_INT, enum machine_mode, addr_space_t); unsigned multiply_by_cost (HOST_WIDE_INT, enum machine_mode, bool); +unsigned add_regs_cost (enum machine_mode, bool); +unsigned multiply_regs_cost (enum machine_mode, bool); +unsigned add_const_cost (enum machine_mode, bool); +unsigned extend_or_trunc_cost (tree, tree, bool); +unsigned negate_cost (enum machine_mode, bool); bool may_be_nonaddressable_p (tree expr); /* In tree-ssa-threadupdate.c. */ Index: gcc/Makefile.in =================================================================== --- gcc/Makefile.in (revision 182962) +++ gcc/Makefile.in (working copy) @@ -1417,6 +1417,7 @@ OBJS = \ tree-ssa-reassoc.o \ tree-ssa-sccvn.o \ tree-ssa-sink.o \ + tree-ssa-strength-reduction.o \ tree-ssa-strlen.o \ tree-ssa-structalias.o \ tree-ssa-tail-merge.o \ @@ -2422,6 +2423,10 @@ tree-ssa-sccvn.o : tree-ssa-sccvn.c $(TREE_FLOW_H) alloc-pool.h $(BASIC_BLOCK_H) $(BITMAP_H) langhooks.h $(HASHTAB_H) $(GIMPLE_H) \ $(TREE_INLINE_H) tree-iterator.h tree-ssa-propagate.h tree-ssa-sccvn.h \ $(PARAMS_H) tree-pretty-print.h gimple-pretty-print.h gimple-fold.h +tree-ssa-strength-reduction.o : tree-ssa-strength-reduction.c $(CONFIG_H) \ + $(SYSTEM_H) coretypes.h $(TREE_H) $(GIMPLE_H) $(BASIC_BLOCK_H) \ + $(TREE_PASS_H) $(TIMEVAR_H) $(CFGLOOP_H) tree-pretty-print.h \ + gimple-pretty-print.h alloc-pool.h $(TREE_FLOW_H) domwalk.h tree-vrp.o : tree-vrp.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) \ $(TREE_FLOW_H) $(TREE_PASS_H) $(TREE_DUMP_H) $(DIAGNOSTIC_H) $(GGC_H) \ $(BASIC_BLOCK_H) tree-ssa-propagate.h $(FLAGS_H) $(TREE_DUMP_H) \ Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 182962) +++ gcc/passes.c (working copy) @@ -1380,6 +1380,10 @@ init_optimization_passes (void) NEXT_PASS (pass_reassoc); NEXT_PASS (pass_vrp); NEXT_PASS (pass_dominator); + /* Strength reduction is more effective after constant and + copy propagation provided by DOM, but produces dead code + that needs to be cleaned up by CD_DCE. */ + NEXT_PASS (pass_strength_reduction); /* The only const/copy propagation opportunities left after DOM should be due to degenerate PHI nodes. So rather than run the full propagators, run a specialized pass which

[RFC] Straight-line strength reduction

Commit Message

Patch