Message ID | alpine.DEB.2.02.1208252133130.20769@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Sat, Aug 25, 2012 at 10:54 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > this patch (bootstrapped and regtested on x86_64) deals with the same issue > as the one at: > > http://gcc.gnu.org/ml/gcc-patches/2012-08/msg00205.html > > that is combining a shuffle of a constructor into a constructor, but at the > tree-ssa level. An advantage is that it works with any size of vectors (the > RTL patch only handles size 2 IIRC). A drawback is that it only applies to > __builtin_shuffle, not the builtins that the x86 front-end uses. > > Note that fold already knew this optimization (my thanks to whoever wrote > it, it helped a lot), it just never got a chance to apply it. > > In the call to fold_ternary, I am not sure if TREE_TYPE(op0) is the right > argument, I could also use the type of the lhs, I don't know if that would > make any difference. > > > While I am here, I would like to write a patch that converts w={v[1],v[0]} > to a builtin_shuffle (under the same conditions where a builtin_shuffle is > not lowered to a constructor of elements, obviously). Is forwprop still the > right pass to add it, or is there another more relevant one? > > > 2012-08-22 Marc Glisse <marc.glisse@inria.fr> > > gcc/ > * tree-ssa-forwprop.c (simplify_permutation): Handle CONSTRUCTOR. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-20.c: New testcase. > > -- > Marc Glisse > Index: testsuite/gcc.dg/tree-ssa/forwprop-20.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-20.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-20.c (revision 0) > @@ -0,0 +1,70 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target double64 } */ > +/* { dg-options "-O2 -fdump-tree-optimized" } */ > + > +#include <stdint.h> > + > +/* All of these optimizations happen for unsupported vector modes as a > + consequence of the lowering pass. We need to test with a vector mode > + that is supported by default on at least some architectures, or make > + the test target specific so we can pass a flag like -mavx. */ > + > +typedef double vecf __attribute__ ((vector_size (2 * sizeof (double)))); > +typedef int64_t veci __attribute__ ((vector_size (2 * sizeof (int64_t)))); > + > +void f (double d, vecf* r) > +{ > + vecf x = { -d, 5 }; > + vecf y = { 1, 4 }; > + veci m = { 2, 0 }; > + *r = __builtin_shuffle (x, y, m); // { 1, -d } > +} > + > +void g (float d, vecf* r) > +{ > + vecf x = { d, 5 }; > + vecf y = { 1, 4 }; > + veci m = { 2, 1 }; > + *r = __builtin_shuffle (x, y, m); // { 1, 5 } > +} > + > +void h (double d, vecf* r) > +{ > + vecf x = { d + 1, 5 }; > + vecf y = { 1 , 4 }; > + veci m = { 2 , 0 }; > + *r = __builtin_shuffle (y, x, m); // { d + 1, 1 } > +} > + > +void i (float d, vecf* r) > +{ > + vecf x = { d, 5 }; > + veci m = { 1, 0 }; > + *r = __builtin_shuffle (x, m); // { 5, d } > +} > + > +void j (vecf* r) > +{ > + vecf y = { 1, 2 }; > + veci m = { 0, 0 }; > + *r = __builtin_shuffle (y, m); // { 1, 1 } > +} > + > +void k (vecf* r) > +{ > + vecf x = { 3, 4 }; > + vecf y = { 1, 2 }; > + veci m = { 3, 0 }; > + *r = __builtin_shuffle (x, y, m); // { 2, 3 } > +} > + > +void l (double d, vecf* r) > +{ > + vecf x = { -d, 5 }; > + vecf y = { d, 4 }; > + veci m = { 2, 0 }; > + *r = __builtin_shuffle (x, y, m); // { d, -d } > +} > + > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-20.c > ___________________________________________________________________ > Added: svn:keywords > + Author Date Id Revision URL > Added: svn:eol-style > + native > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 190666) > +++ tree-ssa-forwprop.c (working copy) > @@ -2602,75 +2602,130 @@ is_combined_permutation_identity (tree m > if (j == i) > maybe_identity2 = false; > else if (j == i + nelts) > maybe_identity1 = false; > else > return 0; > } > return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0; > } > > -/* Combine two shuffles in a row. Returns 1 if there were any changes > - made, 2 if cfg-cleanup needs to run. Else it returns 0. */ > +/* Combine a shuffle with its arguments. Returns 1 if there were any > + changes made, 2 if cfg-cleanup needs to run. Else it returns 0. */ > > static int > simplify_permutation (gimple_stmt_iterator *gsi) > { > gimple stmt = gsi_stmt (*gsi); > gimple def_stmt; > - tree op0, op1, op2, op3; > - enum tree_code code = gimple_assign_rhs_code (stmt); > - enum tree_code code2; > + tree op0, op1, op2, op3, arg0, arg1; > + enum tree_code code; > > - gcc_checking_assert (code == VEC_PERM_EXPR); > + gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR); > > op0 = gimple_assign_rhs1 (stmt); > op1 = gimple_assign_rhs2 (stmt); > op2 = gimple_assign_rhs3 (stmt); > > - if (TREE_CODE (op0) != SSA_NAME) > - return 0; > - > if (TREE_CODE (op2) != VECTOR_CST) > return 0; > > - if (op0 != op1) > - return 0; > + if (TREE_CODE (op0) == VECTOR_CST) > + { > + code = VECTOR_CST; > + arg0 = op0; You shouldn't need the VECTOR_CST handling - constant propagation should already ensure properly simplified code here (and is the more canonical place to handle this). > + } > + else if (TREE_CODE (op0) == SSA_NAME) > + { > + def_stmt = SSA_NAME_DEF_STMT (op0); > + if (!def_stmt || !is_gimple_assign (def_stmt) > + || !can_propagate_from (def_stmt)) > + return 0; > > - def_stmt = SSA_NAME_DEF_STMT (op0); > - if (!def_stmt || !is_gimple_assign (def_stmt) > - || !can_propagate_from (def_stmt)) > + code = gimple_assign_rhs_code (def_stmt); > + arg0 = gimple_assign_rhs1 (def_stmt); > + } > + else > return 0; > > - code2 = gimple_assign_rhs_code (def_stmt); > - > /* Two consecutive shuffles. */ > - if (code2 == VEC_PERM_EXPR) > + if (code == VEC_PERM_EXPR) > { > tree orig; > int ident; > + > + if (op0 != op1) > + return 0; > op3 = gimple_assign_rhs3 (def_stmt); > if (TREE_CODE (op3) != VECTOR_CST) > return 0; > ident = is_combined_permutation_identity (op3, op2); > if (!ident) > return 0; > orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt) > : gimple_assign_rhs2 (def_stmt); > gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); > gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); > gimple_set_num_ops (stmt, 2); > update_stmt (stmt); > return remove_prop_source_from_use (op0) ? 2 : 1; > } > > - return false; > + /* Shuffle of a constructor. */ > + else if (code == CONSTRUCTOR || code == VECTOR_CST) > + { > + tree opt; > + bool ret = false; > + if (op0 != op1) > + { > + if (TREE_CODE (op1) == VECTOR_CST) > + arg1 = op1; > + else if (TREE_CODE (op1) == SSA_NAME) > + { > + enum tree_code code2; > + gimple def_stmt2 = SSA_NAME_DEF_STMT (op1); > + if (!def_stmt2 || !is_gimple_assign (def_stmt2) > + || !can_propagate_from (def_stmt2)) > + return 0; > + > + code2 = gimple_assign_rhs_code (def_stmt2); > + if (code2 != CONSTRUCTOR && code2 != VECTOR_CST) > + return 0; > + arg1 = gimple_assign_rhs1 (def_stmt2); > + } > + else > + return 0; > + > + if ((TREE_CODE (op0) == SSA_NAME && !has_single_use (op0)) > + || (TREE_CODE (op1) == SSA_NAME && !has_single_use (op1))) > + return 0; You do work above and then bail late here. Always do early exists early to reduce useless compile-time. > + } > + else > + { > + /* Already used twice in this statement. */ > + if (TREE_CODE (op0) == SSA_NAME && num_imm_uses (op0) > 2) > + return 0; > + arg1 = arg0; > + } > + opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2); > + if (!opt) > + return 0; > + gimple_assign_set_rhs_from_tree (gsi, opt); You need to verify that fold_ternary returns something that is valid GIMPLE. fold () in general happily returns trees that are in the need of re-gimplification. You expect a CONSTRUCTOR or VECTOR_CST here, so you should check for that. Richard. > + update_stmt (gsi_stmt (*gsi)); > + if (TREE_CODE (op0) == SSA_NAME) > + ret = remove_prop_source_from_use (op0); > + if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) > + ret |= remove_prop_source_from_use (op1); > + return ret ? 2 : 1; > + } > + > + return 0; > } > > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; >
On Mon, 3 Sep 2012, Richard Guenther wrote: > You shouldn't need the VECTOR_CST handling - constant propagation should > already ensure properly simplified code here (and is the more canonical place > to handle this). IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles (and because it wasn't too hard). If I remove it (I can), I guess some of the testcases won't work anymore. > You do work above and then bail late here. Always do early exists early > to reduce useless compile-time. Ok. >> + opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2); >> + if (!opt) >> + return 0; >> + gimple_assign_set_rhs_from_tree (gsi, opt); > > You need to verify that fold_ternary returns something that is valid GIMPLE. > fold () in general happily returns trees that are in the need of > re-gimplification. > You expect a CONSTRUCTOR or VECTOR_CST here, so you should check > for that. Ok. Thank you for the reviews,
On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Mon, 3 Sep 2012, Richard Guenther wrote: > >> You shouldn't need the VECTOR_CST handling - constant propagation should >> already ensure properly simplified code here (and is the more canonical >> place >> to handle this). > > > IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles > (and because it wasn't too hard). If I remove it (I can), I guess some of > the testcases won't work anymore. I see. If you still have a testcase can you look if CCP does not do something it should? > >> You do work above and then bail late here. Always do early exists early >> to reduce useless compile-time. > > > Ok. > > >>> + opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, >>> op2); >>> + if (!opt) >>> + return 0; >>> + gimple_assign_set_rhs_from_tree (gsi, opt); >> >> >> You need to verify that fold_ternary returns something that is valid >> GIMPLE. >> fold () in general happily returns trees that are in the need of >> re-gimplification. >> You expect a CONSTRUCTOR or VECTOR_CST here, so you should check >> for that. > > > Ok. > > Thank you for the reviews, > > -- > Marc Glisse
On Mon, 3 Sep 2012, Richard Guenther wrote: > On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote: >> On Mon, 3 Sep 2012, Richard Guenther wrote: >> >>> You shouldn't need the VECTOR_CST handling - constant propagation should >>> already ensure properly simplified code here (and is the more canonical >>> place >>> to handle this). >> >> >> IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles >> (and because it wasn't too hard). If I remove it (I can), I guess some of >> the testcases won't work anymore. > > I see. If you still have a testcase can you look if CCP does not do > something it should? I think CCP is working fine, the fold_ternary patch you approved today tests some of that (without that patch, sometimes ccp1 does half the work and fre1 finishes it, and since forwprop1 is before fre1, I hit that case there). Is there a particular scenario you have in mind that might not be handled? Here I was concerned with: x={a,b}; // constructor y={18,42}; // vector_cst m={0,3}; __builtin_shuffle(x,y,m) // should be {a,42}
On Mon, Sep 3, 2012 at 5:50 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Mon, 3 Sep 2012, Richard Guenther wrote: > >> On Mon, Sep 3, 2012 at 4:00 PM, Marc Glisse <marc.glisse@inria.fr> wrote: >>> >>> On Mon, 3 Sep 2012, Richard Guenther wrote: >>> >>>> You shouldn't need the VECTOR_CST handling - constant propagation should >>>> already ensure properly simplified code here (and is the more canonical >>>> place >>>> to handle this). >>> >>> >>> >>> IIRC, I added VECTOR_CST because of mixed constructor/vector_cst shuffles >>> (and because it wasn't too hard). If I remove it (I can), I guess some of >>> the testcases won't work anymore. >> >> >> I see. If you still have a testcase can you look if CCP does not do >> something it should? > > > I think CCP is working fine, the fold_ternary patch you approved today tests > some of that (without that patch, sometimes ccp1 does half the work and fre1 > finishes it, and since forwprop1 is before fre1, I hit that case there). Is > there a particular scenario you have in mind that might not be handled? No. In theory it should be handled just fine via gimple_fold_stmt_to_constant_1 dispatching to fold_ternary. Richard. > Here I was concerned with: > x={a,b}; // constructor > y={18,42}; // vector_cst > m={0,3}; > __builtin_shuffle(x,y,m) // should be {a,42} > > -- > Marc Glisse
Index: testsuite/gcc.dg/tree-ssa/forwprop-20.c =================================================================== --- testsuite/gcc.dg/tree-ssa/forwprop-20.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/forwprop-20.c (revision 0) @@ -0,0 +1,70 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target double64 } */ +/* { dg-options "-O2 -fdump-tree-optimized" } */ + +#include <stdint.h> + +/* All of these optimizations happen for unsupported vector modes as a + consequence of the lowering pass. We need to test with a vector mode + that is supported by default on at least some architectures, or make + the test target specific so we can pass a flag like -mavx. */ + +typedef double vecf __attribute__ ((vector_size (2 * sizeof (double)))); +typedef int64_t veci __attribute__ ((vector_size (2 * sizeof (int64_t)))); + +void f (double d, vecf* r) +{ + vecf x = { -d, 5 }; + vecf y = { 1, 4 }; + veci m = { 2, 0 }; + *r = __builtin_shuffle (x, y, m); // { 1, -d } +} + +void g (float d, vecf* r) +{ + vecf x = { d, 5 }; + vecf y = { 1, 4 }; + veci m = { 2, 1 }; + *r = __builtin_shuffle (x, y, m); // { 1, 5 } +} + +void h (double d, vecf* r) +{ + vecf x = { d + 1, 5 }; + vecf y = { 1 , 4 }; + veci m = { 2 , 0 }; + *r = __builtin_shuffle (y, x, m); // { d + 1, 1 } +} + +void i (float d, vecf* r) +{ + vecf x = { d, 5 }; + veci m = { 1, 0 }; + *r = __builtin_shuffle (x, m); // { 5, d } +} + +void j (vecf* r) +{ + vecf y = { 1, 2 }; + veci m = { 0, 0 }; + *r = __builtin_shuffle (y, m); // { 1, 1 } +} + +void k (vecf* r) +{ + vecf x = { 3, 4 }; + vecf y = { 1, 2 }; + veci m = { 3, 0 }; + *r = __builtin_shuffle (x, y, m); // { 2, 3 } +} + +void l (double d, vecf* r) +{ + vecf x = { -d, 5 }; + vecf y = { d, 4 }; + veci m = { 2, 0 }; + *r = __builtin_shuffle (x, y, m); // { d, -d } +} + +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-20.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: tree-ssa-forwprop.c =================================================================== --- tree-ssa-forwprop.c (revision 190666) +++ tree-ssa-forwprop.c (working copy) @@ -2602,75 +2602,130 @@ is_combined_permutation_identity (tree m if (j == i) maybe_identity2 = false; else if (j == i + nelts) maybe_identity1 = false; else return 0; } return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0; } -/* Combine two shuffles in a row. Returns 1 if there were any changes - made, 2 if cfg-cleanup needs to run. Else it returns 0. */ +/* Combine a shuffle with its arguments. Returns 1 if there were any + changes made, 2 if cfg-cleanup needs to run. Else it returns 0. */ static int simplify_permutation (gimple_stmt_iterator *gsi) { gimple stmt = gsi_stmt (*gsi); gimple def_stmt; - tree op0, op1, op2, op3; - enum tree_code code = gimple_assign_rhs_code (stmt); - enum tree_code code2; + tree op0, op1, op2, op3, arg0, arg1; + enum tree_code code; - gcc_checking_assert (code == VEC_PERM_EXPR); + gcc_checking_assert (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR); op0 = gimple_assign_rhs1 (stmt); op1 = gimple_assign_rhs2 (stmt); op2 = gimple_assign_rhs3 (stmt); - if (TREE_CODE (op0) != SSA_NAME) - return 0; - if (TREE_CODE (op2) != VECTOR_CST) return 0; - if (op0 != op1) - return 0; + if (TREE_CODE (op0) == VECTOR_CST) + { + code = VECTOR_CST; + arg0 = op0; + } + else if (TREE_CODE (op0) == SSA_NAME) + { + def_stmt = SSA_NAME_DEF_STMT (op0); + if (!def_stmt || !is_gimple_assign (def_stmt) + || !can_propagate_from (def_stmt)) + return 0; - def_stmt = SSA_NAME_DEF_STMT (op0); - if (!def_stmt || !is_gimple_assign (def_stmt) - || !can_propagate_from (def_stmt)) + code = gimple_assign_rhs_code (def_stmt); + arg0 = gimple_assign_rhs1 (def_stmt); + } + else return 0; - code2 = gimple_assign_rhs_code (def_stmt); - /* Two consecutive shuffles. */ - if (code2 == VEC_PERM_EXPR) + if (code == VEC_PERM_EXPR) { tree orig; int ident; + + if (op0 != op1) + return 0; op3 = gimple_assign_rhs3 (def_stmt); if (TREE_CODE (op3) != VECTOR_CST) return 0; ident = is_combined_permutation_identity (op3, op2); if (!ident) return 0; orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt) : gimple_assign_rhs2 (def_stmt); gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); gimple_set_num_ops (stmt, 2); update_stmt (stmt); return remove_prop_source_from_use (op0) ? 2 : 1; } - return false; + /* Shuffle of a constructor. */ + else if (code == CONSTRUCTOR || code == VECTOR_CST) + { + tree opt; + bool ret = false; + if (op0 != op1) + { + if (TREE_CODE (op1) == VECTOR_CST) + arg1 = op1; + else if (TREE_CODE (op1) == SSA_NAME) + { + enum tree_code code2; + gimple def_stmt2 = SSA_NAME_DEF_STMT (op1); + if (!def_stmt2 || !is_gimple_assign (def_stmt2) + || !can_propagate_from (def_stmt2)) + return 0; + + code2 = gimple_assign_rhs_code (def_stmt2); + if (code2 != CONSTRUCTOR && code2 != VECTOR_CST) + return 0; + arg1 = gimple_assign_rhs1 (def_stmt2); + } + else + return 0; + + if ((TREE_CODE (op0) == SSA_NAME && !has_single_use (op0)) + || (TREE_CODE (op1) == SSA_NAME && !has_single_use (op1))) + return 0; + } + else + { + /* Already used twice in this statement. */ + if (TREE_CODE (op0) == SSA_NAME && num_imm_uses (op0) > 2) + return 0; + arg1 = arg0; + } + opt = fold_ternary (VEC_PERM_EXPR, TREE_TYPE(op0), arg0, arg1, op2); + if (!opt) + return 0; + gimple_assign_set_rhs_from_tree (gsi, opt); + update_stmt (gsi_stmt (*gsi)); + if (TREE_CODE (op0) == SSA_NAME) + ret = remove_prop_source_from_use (op0); + if (op0 != op1 && TREE_CODE (op1) == SSA_NAME) + ret |= remove_prop_source_from_use (op1); + return ret ? 2 : 1; + } + + return 0; } /* Main entry point for the forward propagation and statement combine optimizer. */ static unsigned int ssa_forward_propagate_and_combine (void) { basic_block bb; unsigned int todoflags = 0;