Message ID | 20101103145129.GQ29412@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Wed, 3 Nov 2010, Jakub Jelinek wrote: > Hi! > > This patch implements store sinking for > if (cond) > mem = ssa_name_or_invariant1; > else > mem = ssa_name_or_invariant2; > in cselim pass and fixes a few issues in cselim's cond_store_replacement. > Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk? Ok. Thanks, Richard. > 2010-11-03 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/46009 > * tree-ssa-phiopt.c (tree_ssa_phiopt_worker): Call > cond_if_else_store_replacement if bb1 and bb2 have the same > single successor. > (cond_store_replacement): Use gimple_assign_single_p, don't > check if rhs is SSA_NAME or invariant. Call release_defs for > assign. > (cond_if_else_store_replacement): New function. > > * gcc.dg/vect/pr46009.c: New function. > > --- gcc/tree-ssa-phiopt.c.jj 2010-11-01 09:07:23.000000000 +0100 > +++ gcc/tree-ssa-phiopt.c 2010-11-03 11:52:20.000000000 +0100 > @@ -1,6 +1,6 @@ > /* Optimization of PHI nodes by converting them into straightline code. > - Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation, > - Inc. > + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 > + Free Software Foundation, Inc. > > This file is part of GCC. > > @@ -47,6 +47,7 @@ static bool abs_replacement (basic_block > edge, edge, gimple, tree, tree); > static bool cond_store_replacement (basic_block, basic_block, edge, edge, > struct pointer_set_t *); > +static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); > static struct pointer_set_t * get_non_trapping (void); > static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree); > > @@ -149,7 +150,7 @@ tree_ssa_phiopt (void) > bb0: > if (cond) goto bb2; else goto bb1; > bb1: > - *p = RHS > + *p = RHS; > bb2: > > with > @@ -160,10 +161,28 @@ tree_ssa_phiopt (void) > condtmp' = *p; > bb2: > condtmp = PHI <RHS, condtmp'> > - *p = condtmp > + *p = condtmp; > > This transformation can only be done under several constraints, > - documented below. */ > + documented below. It also replaces: > + > + bb0: > + if (cond) goto bb2; else goto bb1; > + bb1: > + *p = RHS1; > + goto bb3; > + bb2: > + *p = RHS2; > + bb3: > + > + with > + > + bb0: > + if (cond) goto bb3; else goto bb1; > + bb1: > + bb3: > + condtmp = PHI <RHS1, RHS2> > + *p = condtmp; */ > > static unsigned int > tree_ssa_cs_elim (void) > @@ -248,8 +267,23 @@ tree_ssa_phiopt_worker (bool do_store_el > e1 = e2; > e2 = e_tmp; > } > + else if (do_store_elim > + && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) > + { > + basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; > + > + if (!single_succ_p (bb1) > + || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 > + || !single_succ_p (bb2) > + || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 > + || EDGE_COUNT (bb3->preds) != 2) > + continue; > + if (cond_if_else_store_replacement (bb1, bb2, bb3)) > + cfgchanged = true; > + continue; > + } > else > - continue; > + continue; > > e1 = EDGE_SUCC (bb1, 0); > > @@ -1190,26 +1224,20 @@ cond_store_replacement (basic_block midd > gimple newphi, new_stmt; > gimple_stmt_iterator gsi; > source_location locus; > - enum tree_code code; > > /* Check if middle_bb contains of only one store. */ > if (!assign > - || gimple_code (assign) != GIMPLE_ASSIGN) > + || !gimple_assign_single_p (assign)) > return false; > > locus = gimple_location (assign); > lhs = gimple_assign_lhs (assign); > rhs = gimple_assign_rhs1 (assign); > if (TREE_CODE (lhs) != MEM_REF > - || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME) > - return false; > - > - /* RHS is either a single SSA_NAME or a constant of register type. */ > - code = gimple_assign_rhs_code (assign); > - if (get_gimple_rhs_class (code) != GIMPLE_SINGLE_RHS > - || (code != SSA_NAME && !is_gimple_min_invariant (rhs)) > + || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME > || !is_gimple_reg_type (TREE_TYPE (lhs))) > return false; > + > /* Prove that we can move the store down. We could also check > TREE_THIS_NOTRAP here, but in that case we also could move stores, > whose value is not available readily, which we want to avoid. */ > @@ -1221,6 +1249,7 @@ cond_store_replacement (basic_block midd > gsi = gsi_for_stmt (assign); > unlink_stmt_vdef (assign); > gsi_remove (&gsi, true); > + release_defs (assign); > > /* 2) Create a temporary where we can store the old content > of the memory touched by the store, if we need to. */ > @@ -1263,6 +1292,99 @@ cond_store_replacement (basic_block midd > return true; > } > > +/* Do the main work of conditional store replacement. We already know > + that the recognized pattern looks like so: > + > + split: > + if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) > + THEN_BB: > + X = Y; > + goto JOIN_BB; > + ELSE_BB: > + X = Z; > + fallthrough (edge E0) > + JOIN_BB: > + some more > + > + We check that THEN_BB and ELSE_BB contain only one store > + that the stores have a "simple" RHS. */ > + > +static bool > +cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, > + basic_block join_bb) > +{ > + gimple then_assign = last_and_only_stmt (then_bb); > + gimple else_assign = last_and_only_stmt (else_bb); > + tree lhs_base, lhs, then_rhs, else_rhs; > + source_location then_locus, else_locus; > + gimple_stmt_iterator gsi; > + gimple newphi, new_stmt; > + > + /* Check if then_bb and else_bb contain only one store each. */ > + if (then_assign == NULL > + || !gimple_assign_single_p (then_assign) > + || else_assign == NULL > + || !gimple_assign_single_p (else_assign)) > + return false; > + > + lhs = gimple_assign_lhs (then_assign); > + if (!is_gimple_reg_type (TREE_TYPE (lhs)) > + || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) > + return false; > + > + lhs_base = get_base_address (lhs); > + if (lhs_base == NULL_TREE > + || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) > + return false; > + > + then_rhs = gimple_assign_rhs1 (then_assign); > + else_rhs = gimple_assign_rhs1 (else_assign); > + then_locus = gimple_location (then_assign); > + else_locus = gimple_location (else_assign); > + > + /* Now we've checked the constraints, so do the transformation: > + 1) Remove the stores. */ > + gsi = gsi_for_stmt (then_assign); > + unlink_stmt_vdef (then_assign); > + gsi_remove (&gsi, true); > + release_defs (then_assign); > + > + gsi = gsi_for_stmt (else_assign); > + unlink_stmt_vdef (else_assign); > + gsi_remove (&gsi, true); > + release_defs (else_assign); > + > + /* 2) Create a temporary where we can store the old content > + of the memory touched by the store, if we need to. */ > + if (!condstoretemp || TREE_TYPE (lhs) != TREE_TYPE (condstoretemp)) > + { > + condstoretemp = create_tmp_reg (TREE_TYPE (lhs), "cstore"); > + get_var_ann (condstoretemp); > + } > + add_referenced_var (condstoretemp); > + > + /* 3) Create a PHI node at the join block, with one argument > + holding the old RHS, and the other holding the temporary > + where we stored the old memory contents. */ > + newphi = create_phi_node (condstoretemp, join_bb); > + add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); > + add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); > + > + new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); > + > + /* 4) Insert that PHI node. */ > + gsi = gsi_after_labels (join_bb); > + if (gsi_end_p (gsi)) > + { > + gsi = gsi_last_bb (join_bb); > + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); > + } > + else > + gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); > + > + return true; > +} > + > /* Always do these optimizations if we have SSA > trees to work on. */ > static bool > --- gcc/testsuite/gcc.dg/vect/pr46009.c.jj 2010-11-03 11:59:27.000000000 +0100 > +++ gcc/testsuite/gcc.dg/vect/pr46009.c 2010-11-03 12:05:20.000000000 +0100 > @@ -0,0 +1,74 @@ > +/* PR tree-optimization/46009 */ > +/* { dg-require-effective-target vect_int } */ > + > +#include "tree-vect.h" > + > +int a[1024] __attribute__((aligned)); > +int b[1024] __attribute__((aligned)); > +int c[1024] __attribute__((aligned)); > +int d[1024] __attribute__((aligned)); > +int e[1024] __attribute__((aligned)); > + > +void __attribute__((noinline)) > +foo (void) > +{ > + int i, g; > + for (i = 0; i < 1024; i++) > + { > + g = a[i] + b[i] + c[i] * d[i];; > + e[i] = g < 10 ? 1 : g; > + } > +} > + > +void __attribute__((noinline)) > +bar (void) > +{ > + int i, g; > + for (i = 0; i < 1024; i++) > + { > + g = a[i] + b[i] + c[i] * d[i];; > + if (g < 10) > + e[i] = 1; > + else > + e[i] = g; > + } > +} > + > +int > +main (void) > +{ > + int i; > + check_vect (); > + for (i = 0; i < 1024; i++) > + { > + asm volatile ("" : "+r" (i)); > + a[i] = i % 10; > + b[i] = i % 10; > + c[i] = 1; > + d[i] = -1; > + e[i] = -1; > + } > + foo (); > + for (i = 0; i < 1024; i++) > + { > + int g; > + asm volatile ("" : "+r" (i)); > + g = 2 * (i % 10) - 1; > + if (e[i] != (g < 10 ? 1 : g)) > + abort (); > + e[i] = -1; > + } > + bar (); > + for (i = 0; i < 1024; i++) > + { > + int g; > + asm volatile ("" : "+r" (i)); > + g = 2 * (i % 10) - 1; > + if (e[i] != (g < 10 ? 1 : g)) > + abort (); > + } > + return 0; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */ > > Jakub > >
--- gcc/tree-ssa-phiopt.c.jj 2010-11-01 09:07:23.000000000 +0100 +++ gcc/tree-ssa-phiopt.c 2010-11-03 11:52:20.000000000 +0100 @@ -1,6 +1,6 @@ /* Optimization of PHI nodes by converting them into straightline code. - Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation, - Inc. + Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 + Free Software Foundation, Inc. This file is part of GCC. @@ -47,6 +47,7 @@ static bool abs_replacement (basic_block edge, edge, gimple, tree, tree); static bool cond_store_replacement (basic_block, basic_block, edge, edge, struct pointer_set_t *); +static bool cond_if_else_store_replacement (basic_block, basic_block, basic_block); static struct pointer_set_t * get_non_trapping (void); static void replace_phi_edge_with_variable (basic_block, edge, gimple, tree); @@ -149,7 +150,7 @@ tree_ssa_phiopt (void) bb0: if (cond) goto bb2; else goto bb1; bb1: - *p = RHS + *p = RHS; bb2: with @@ -160,10 +161,28 @@ tree_ssa_phiopt (void) condtmp' = *p; bb2: condtmp = PHI <RHS, condtmp'> - *p = condtmp + *p = condtmp; This transformation can only be done under several constraints, - documented below. */ + documented below. It also replaces: + + bb0: + if (cond) goto bb2; else goto bb1; + bb1: + *p = RHS1; + goto bb3; + bb2: + *p = RHS2; + bb3: + + with + + bb0: + if (cond) goto bb3; else goto bb1; + bb1: + bb3: + condtmp = PHI <RHS1, RHS2> + *p = condtmp; */ static unsigned int tree_ssa_cs_elim (void) @@ -248,8 +267,23 @@ tree_ssa_phiopt_worker (bool do_store_el e1 = e2; e2 = e_tmp; } + else if (do_store_elim + && EDGE_SUCC (bb1, 0)->dest == EDGE_SUCC (bb2, 0)->dest) + { + basic_block bb3 = EDGE_SUCC (bb1, 0)->dest; + + if (!single_succ_p (bb1) + || (EDGE_SUCC (bb1, 0)->flags & EDGE_FALLTHRU) == 0 + || !single_succ_p (bb2) + || (EDGE_SUCC (bb2, 0)->flags & EDGE_FALLTHRU) == 0 + || EDGE_COUNT (bb3->preds) != 2) + continue; + if (cond_if_else_store_replacement (bb1, bb2, bb3)) + cfgchanged = true; + continue; + } else - continue; + continue; e1 = EDGE_SUCC (bb1, 0); @@ -1190,26 +1224,20 @@ cond_store_replacement (basic_block midd gimple newphi, new_stmt; gimple_stmt_iterator gsi; source_location locus; - enum tree_code code; /* Check if middle_bb contains of only one store. */ if (!assign - || gimple_code (assign) != GIMPLE_ASSIGN) + || !gimple_assign_single_p (assign)) return false; locus = gimple_location (assign); lhs = gimple_assign_lhs (assign); rhs = gimple_assign_rhs1 (assign); if (TREE_CODE (lhs) != MEM_REF - || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME) - return false; - - /* RHS is either a single SSA_NAME or a constant of register type. */ - code = gimple_assign_rhs_code (assign); - if (get_gimple_rhs_class (code) != GIMPLE_SINGLE_RHS - || (code != SSA_NAME && !is_gimple_min_invariant (rhs)) + || TREE_CODE (TREE_OPERAND (lhs, 0)) != SSA_NAME || !is_gimple_reg_type (TREE_TYPE (lhs))) return false; + /* Prove that we can move the store down. We could also check TREE_THIS_NOTRAP here, but in that case we also could move stores, whose value is not available readily, which we want to avoid. */ @@ -1221,6 +1249,7 @@ cond_store_replacement (basic_block midd gsi = gsi_for_stmt (assign); unlink_stmt_vdef (assign); gsi_remove (&gsi, true); + release_defs (assign); /* 2) Create a temporary where we can store the old content of the memory touched by the store, if we need to. */ @@ -1263,6 +1292,99 @@ cond_store_replacement (basic_block midd return true; } +/* Do the main work of conditional store replacement. We already know + that the recognized pattern looks like so: + + split: + if (cond) goto THEN_BB; else goto ELSE_BB (edge E1) + THEN_BB: + X = Y; + goto JOIN_BB; + ELSE_BB: + X = Z; + fallthrough (edge E0) + JOIN_BB: + some more + + We check that THEN_BB and ELSE_BB contain only one store + that the stores have a "simple" RHS. */ + +static bool +cond_if_else_store_replacement (basic_block then_bb, basic_block else_bb, + basic_block join_bb) +{ + gimple then_assign = last_and_only_stmt (then_bb); + gimple else_assign = last_and_only_stmt (else_bb); + tree lhs_base, lhs, then_rhs, else_rhs; + source_location then_locus, else_locus; + gimple_stmt_iterator gsi; + gimple newphi, new_stmt; + + /* Check if then_bb and else_bb contain only one store each. */ + if (then_assign == NULL + || !gimple_assign_single_p (then_assign) + || else_assign == NULL + || !gimple_assign_single_p (else_assign)) + return false; + + lhs = gimple_assign_lhs (then_assign); + if (!is_gimple_reg_type (TREE_TYPE (lhs)) + || !operand_equal_p (lhs, gimple_assign_lhs (else_assign), 0)) + return false; + + lhs_base = get_base_address (lhs); + if (lhs_base == NULL_TREE + || (!DECL_P (lhs_base) && TREE_CODE (lhs_base) != MEM_REF)) + return false; + + then_rhs = gimple_assign_rhs1 (then_assign); + else_rhs = gimple_assign_rhs1 (else_assign); + then_locus = gimple_location (then_assign); + else_locus = gimple_location (else_assign); + + /* Now we've checked the constraints, so do the transformation: + 1) Remove the stores. */ + gsi = gsi_for_stmt (then_assign); + unlink_stmt_vdef (then_assign); + gsi_remove (&gsi, true); + release_defs (then_assign); + + gsi = gsi_for_stmt (else_assign); + unlink_stmt_vdef (else_assign); + gsi_remove (&gsi, true); + release_defs (else_assign); + + /* 2) Create a temporary where we can store the old content + of the memory touched by the store, if we need to. */ + if (!condstoretemp || TREE_TYPE (lhs) != TREE_TYPE (condstoretemp)) + { + condstoretemp = create_tmp_reg (TREE_TYPE (lhs), "cstore"); + get_var_ann (condstoretemp); + } + add_referenced_var (condstoretemp); + + /* 3) Create a PHI node at the join block, with one argument + holding the old RHS, and the other holding the temporary + where we stored the old memory contents. */ + newphi = create_phi_node (condstoretemp, join_bb); + add_phi_arg (newphi, then_rhs, EDGE_SUCC (then_bb, 0), then_locus); + add_phi_arg (newphi, else_rhs, EDGE_SUCC (else_bb, 0), else_locus); + + new_stmt = gimple_build_assign (lhs, PHI_RESULT (newphi)); + + /* 4) Insert that PHI node. */ + gsi = gsi_after_labels (join_bb); + if (gsi_end_p (gsi)) + { + gsi = gsi_last_bb (join_bb); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + } + else + gsi_insert_before (&gsi, new_stmt, GSI_NEW_STMT); + + return true; +} + /* Always do these optimizations if we have SSA trees to work on. */ static bool --- gcc/testsuite/gcc.dg/vect/pr46009.c.jj 2010-11-03 11:59:27.000000000 +0100 +++ gcc/testsuite/gcc.dg/vect/pr46009.c 2010-11-03 12:05:20.000000000 +0100 @@ -0,0 +1,74 @@ +/* PR tree-optimization/46009 */ +/* { dg-require-effective-target vect_int } */ + +#include "tree-vect.h" + +int a[1024] __attribute__((aligned)); +int b[1024] __attribute__((aligned)); +int c[1024] __attribute__((aligned)); +int d[1024] __attribute__((aligned)); +int e[1024] __attribute__((aligned)); + +void __attribute__((noinline)) +foo (void) +{ + int i, g; + for (i = 0; i < 1024; i++) + { + g = a[i] + b[i] + c[i] * d[i];; + e[i] = g < 10 ? 1 : g; + } +} + +void __attribute__((noinline)) +bar (void) +{ + int i, g; + for (i = 0; i < 1024; i++) + { + g = a[i] + b[i] + c[i] * d[i];; + if (g < 10) + e[i] = 1; + else + e[i] = g; + } +} + +int +main (void) +{ + int i; + check_vect (); + for (i = 0; i < 1024; i++) + { + asm volatile ("" : "+r" (i)); + a[i] = i % 10; + b[i] = i % 10; + c[i] = 1; + d[i] = -1; + e[i] = -1; + } + foo (); + for (i = 0; i < 1024; i++) + { + int g; + asm volatile ("" : "+r" (i)); + g = 2 * (i % 10) - 1; + if (e[i] != (g < 10 ? 1 : g)) + abort (); + e[i] = -1; + } + bar (); + for (i = 0; i < 1024; i++) + { + int g; + asm volatile ("" : "+r" (i)); + g = 2 * (i % 10) - 1; + if (e[i] != (g < 10 ? 1 : g)) + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */