Message ID | alpine.DEB.2.02.1209012034540.14982@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Sat, Sep 1, 2012 at 8:54 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > this patch makes it so that instead of taking the k-th element of a shuffle > of V, we directly take the k'-th element of V. > > Note that I am *not* checking that the shuffle is only used once. There can > be some circumstances where this optimization will make us miss other > opportunities later, but restricting it to the single use case would make it > much less useful. > > This is also my first contact with BIT_FIELD_REF, I may have missed some > properties of that tree. > > bootstrap+testsuite ok. > > 2012-09-01 Marc Glisse <marc.glisse@inria.fr> > > gcc/ > * tree-ssa-forwprop.c (simplify_bitfield): New function. > (ssa_forward_propagate_and_combine): Call it. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-21.c: New testcase. > > (-21 because I have another patch pending review that adds a -20 testcase) > (simplify_bitfield appears before simplify_permutation to minimize conflicts > with that same patch, I can put it back in order when I commit if you > prefer) > > -- > Marc Glisse > Index: testsuite/gcc.dg/tree-ssa/forwprop-21.c > =================================================================== > --- testsuite/gcc.dg/tree-ssa/forwprop-21.c (revision 0) > +++ testsuite/gcc.dg/tree-ssa/forwprop-21.c (revision 0) > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-optimized" } */ > +typedef int v4si __attribute__ ((vector_size (4 * sizeof(int)))); > + > +int > +test (v4si *x, v4si *y) > +{ > + v4si m = { 2, 3, 6, 5 }; > + v4si z = __builtin_shuffle (*x, *y, m); > + return z[2]; > +} > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ > +/* { dg-final { cleanup-tree-dump "optimized" } } */ > > Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-21.c > ___________________________________________________________________ > Added: svn:eol-style > + native > Added: svn:keywords > + Author Date Id Revision URL > > Index: tree-ssa-forwprop.c > =================================================================== > --- tree-ssa-forwprop.c (revision 190847) > +++ tree-ssa-forwprop.c (working copy) > @@ -2570,20 +2570,88 @@ combine_conversions (gimple_stmt_iterato > gimple_assign_set_rhs_code (stmt, CONVERT_EXPR); > update_stmt (stmt); > return remove_prop_source_from_use (op0) ? 2 : 1; > } > } > } > > return 0; > } > > +/* Combine an element access with a shuffle. Returns true if there were > + any changes made, else it returns false. */ > + > +static bool > +simplify_bitfield (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op, op0, op1, op2; > + unsigned idx, n, size; > + enum tree_code code; > + > + op = gimple_assign_rhs1 (stmt); > + gcc_checking_assert (TREE_CODE (op) == BIT_FIELD_REF); > + > + op0 = TREE_OPERAND (op, 0); > + op1 = TREE_OPERAND (op, 1); > + op2 = TREE_OPERAND (op, 2); > + > + if (TREE_CODE (TREE_TYPE (op0)) != VECTOR_TYPE) > + return false; > + > + size = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (op0)))); > + n = TREE_INT_CST_LOW (op1) / size; > + idx = TREE_INT_CST_LOW (op2) / size; > + > + if (n != 1) > + return false; > + > + if (TREE_CODE (op0) != SSA_NAME) > + return false; Please do the early outs where you compute the arguments. Thus, right after getting op0 in this case or right after computing n for the n != 1 check. I think you need to verify that the type of 'op' is actually the element type of op0. The BIT_FIELD_REF can happily access elements two and three of { 1, 2, 3, 4 } as a long for example. See the BIT_FIELD_REF foldings in fold-const.c. > + def_stmt = SSA_NAME_DEF_STMT (op0); > + if (!def_stmt || !is_gimple_assign (def_stmt) > + || !can_propagate_from (def_stmt)) > + return false; > + > + code = gimple_assign_rhs_code (def_stmt); > + > + if (code == VEC_PERM_EXPR) > + { > + tree p, m, index, tem; > + unsigned nelts; > + m = gimple_assign_rhs3 (def_stmt); > + if (TREE_CODE (m) != VECTOR_CST) > + return false; > + nelts = VECTOR_CST_NELTS (m); > + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); > + idx %= 2 * nelts; > + if (idx < nelts) > + { > + p = gimple_assign_rhs1 (def_stmt); > + } > + else > + { > + p = gimple_assign_rhs2 (def_stmt); > + idx -= nelts; > + } > + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); > + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index); This shouldn't simplify, so you can use build3 instead. Please also add handling of code == CONSTRUCTOR. Thanks, Richard. > + gimple_assign_set_rhs1 (stmt, tem); > + update_stmt (stmt); > + return true; > + } > + > + return false; > +} > + > /* Determine whether applying the 2 permutations (mask1 then mask2) > gives back one of the input. */ > > static int > is_combined_permutation_identity (tree mask1, tree mask2) > { > tree mask; > unsigned int nelts, i, j; > bool maybe_identity1 = true; > bool maybe_identity2 = true; > @@ -2828,20 +2896,22 @@ ssa_forward_propagate_and_combine (void) > cfg_changed = true; > changed = did_something != 0; > } > else if (code == VEC_PERM_EXPR) > { > int did_something = simplify_permutation (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > + else if (code == BIT_FIELD_REF) > + changed = simplify_bitfield (&gsi); > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; >
On Mon, 3 Sep 2012, Richard Guenther wrote: > Please do the early outs where you compute the arguments. Thus, right > after getting op0 in this case or right after computing n for the n != 1 > check. Ok. > I think you need to verify that the type of 'op' is actually the element type > of op0. The BIT_FIELD_REF can happily access elements two and three > of { 1, 2, 3, 4 } as a long for example. Indeed I missed that. > See the BIT_FIELD_REF foldings in fold-const.c. That's what I was looking at (picked the same variable names size, idx, n) but I forgot that test :-( >> + if (code == VEC_PERM_EXPR) >> + { >> + tree p, m, index, tem; >> + unsigned nelts; >> + m = gimple_assign_rhs3 (def_stmt); >> + if (TREE_CODE (m) != VECTOR_CST) >> + return false; >> + nelts = VECTOR_CST_NELTS (m); >> + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); >> + idx %= 2 * nelts; >> + if (idx < nelts) >> + { >> + p = gimple_assign_rhs1 (def_stmt); >> + } >> + else >> + { >> + p = gimple_assign_rhs2 (def_stmt); >> + idx -= nelts; >> + } >> + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); >> + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index); > > This shouldn't simplify, so you can use build3 instead. I think that it is possible for p to be a VECTOR_CST, if the shuffle involves one constant and one non-constant vectors, no? Now that I look at this line, I wonder if I am missing some unshare_expr for p and/or op1. > Please also add handling of code == CONSTRUCTOR. The cases I tried were already handled by fre1. I can add code for constructor, but I'll need to look for a testcase first. Can that go to a different patch?
On Mon, Sep 3, 2012 at 3:39 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Mon, 3 Sep 2012, Richard Guenther wrote: > >> Please do the early outs where you compute the arguments. Thus, right >> after getting op0 in this case or right after computing n for the n != 1 >> check. > > > Ok. > >> I think you need to verify that the type of 'op' is actually the element >> type >> of op0. The BIT_FIELD_REF can happily access elements two and three >> of { 1, 2, 3, 4 } as a long for example. > > > Indeed I missed that. > > >> See the BIT_FIELD_REF foldings in fold-const.c. > > > That's what I was looking at (picked the same variable names size, idx, n) > but I forgot that test :-( > > >>> + if (code == VEC_PERM_EXPR) >>> + { >>> + tree p, m, index, tem; >>> + unsigned nelts; >>> + m = gimple_assign_rhs3 (def_stmt); >>> + if (TREE_CODE (m) != VECTOR_CST) >>> + return false; >>> + nelts = VECTOR_CST_NELTS (m); >>> + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); >>> + idx %= 2 * nelts; >>> + if (idx < nelts) >>> + { >>> + p = gimple_assign_rhs1 (def_stmt); >>> + } >>> + else >>> + { >>> + p = gimple_assign_rhs2 (def_stmt); >>> + idx -= nelts; >>> + } >>> + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); >>> + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index); >> >> >> This shouldn't simplify, so you can use build3 instead. > > > I think that it is possible for p to be a VECTOR_CST, if the shuffle > involves one constant and one non-constant vectors, no? Well, constant propagation should have handled it ... If you use fold_build3 you need to check that the result is in expected form (a is_gimple_invariant or an SSA_NAME). > Now that I look at this line, I wonder if I am missing some unshare_expr for > p and/or op1. If either is a CONSTRUCTOR and its def stmt is not removed and it survives into tem then yes ... >> Please also add handling of code == CONSTRUCTOR. > > > The cases I tried were already handled by fre1. I can add code for > constructor, but I'll need to look for a testcase first. Can that go to a > different patch? Yes. Thanks, Richard. > -- > Marc Glisse
On Mon, 3 Sep 2012, Richard Guenther wrote: >>>> + if (code == VEC_PERM_EXPR) >>>> + { >>>> + tree p, m, index, tem; >>>> + unsigned nelts; >>>> + m = gimple_assign_rhs3 (def_stmt); >>>> + if (TREE_CODE (m) != VECTOR_CST) >>>> + return false; >>>> + nelts = VECTOR_CST_NELTS (m); >>>> + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); >>>> + idx %= 2 * nelts; >>>> + if (idx < nelts) >>>> + { >>>> + p = gimple_assign_rhs1 (def_stmt); >>>> + } >>>> + else >>>> + { >>>> + p = gimple_assign_rhs2 (def_stmt); >>>> + idx -= nelts; >>>> + } >>>> + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); >>>> + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index); >>> >>> >>> This shouldn't simplify, so you can use build3 instead. >> >> >> I think that it is possible for p to be a VECTOR_CST, if the shuffle >> involves one constant and one non-constant vectors, no? > > Well, constant propagation should have handled it ... When it sees __builtin_shuffle(cst1,var,cst2)[cst3], CCP should basically do the same thing I am doing here, in the hope that the element will be part of cst1 instead of var? What if builtin_shuffle takes 2 constructors, one of which contains at least one constant? It looks easier to handle it here and let the next run of CCP notice the simplified expression. Or do you mean I should add the new function to CCP (or even fold) instead of forwprop? (wouldn't be the first time CCP does more than constant propagation) > If you use fold_build3 you need to check that the result is in expected form > (a is_gimple_invariant or an SSA_NAME). > >> Now that I look at this line, I wonder if I am missing some unshare_expr for >> p and/or op1. > > If either is a CONSTRUCTOR and its def stmt is not removed and it survives > into tem then yes ... But the integer_cst doesn't need it. Ok, thanks.
On Mon, Sep 3, 2012 at 6:12 PM, Marc Glisse <marc.glisse@inria.fr> wrote: > On Mon, 3 Sep 2012, Richard Guenther wrote: > >>>>> + if (code == VEC_PERM_EXPR) >>>>> + { >>>>> + tree p, m, index, tem; >>>>> + unsigned nelts; >>>>> + m = gimple_assign_rhs3 (def_stmt); >>>>> + if (TREE_CODE (m) != VECTOR_CST) >>>>> + return false; >>>>> + nelts = VECTOR_CST_NELTS (m); >>>>> + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); >>>>> + idx %= 2 * nelts; >>>>> + if (idx < nelts) >>>>> + { >>>>> + p = gimple_assign_rhs1 (def_stmt); >>>>> + } >>>>> + else >>>>> + { >>>>> + p = gimple_assign_rhs2 (def_stmt); >>>>> + idx -= nelts; >>>>> + } >>>>> + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); >>>>> + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, >>>>> index); >>>> >>>> >>>> >>>> This shouldn't simplify, so you can use build3 instead. >>> >>> >>> >>> I think that it is possible for p to be a VECTOR_CST, if the shuffle >>> involves one constant and one non-constant vectors, no? >> >> >> Well, constant propagation should have handled it ... > > > When it sees __builtin_shuffle(cst1,var,cst2)[cst3], CCP should basically do > the same thing I am doing here, in the hope that the element will be part of > cst1 instead of var? Yes, if CCP sees vec_1 = VEC_PERM <cst1, var, cst2>; scalar_2 = BIT_FIELD_REF <vec_1, ....cst3>; then if vec_1 ends up being constant it should figure out that vec_1 is constant and that scalar_2 is constant. Of course if we have vec_1 = VEC_PERM <cst1, var1, var2>; and var1/var2 are CONSTRUCTORS with some elements constants then it won't be able to do that and forwprop should do it. So I suppose handling constants in forwprop is fine (but it would be nice to double-check if in the first example CCP figures out that vec_1 and scalar_2 are constant). > What if builtin_shuffle takes 2 constructors, one of > which contains at least one constant? It looks easier to handle it here and > let the next run of CCP notice the simplified expression. Or do you mean I > should add the new function to CCP (or even fold) instead of forwprop? > (wouldn't be the first time CCP does more than constant propagation) CCP should only do lattice-based constant propagation, other cases need to be handled in forwprop. >> If you use fold_build3 you need to check that the result is in expected >> form >> (a is_gimple_invariant or an SSA_NAME). >> >>> Now that I look at this line, I wonder if I am missing some unshare_expr >>> for >>> p and/or op1. >> >> >> If either is a CONSTRUCTOR and its def stmt is not removed and it survives >> into tem then yes ... > > > But the integer_cst doesn't need it. Ok, thanks. > > -- > Marc Glisse
Index: testsuite/gcc.dg/tree-ssa/forwprop-21.c =================================================================== --- testsuite/gcc.dg/tree-ssa/forwprop-21.c (revision 0) +++ testsuite/gcc.dg/tree-ssa/forwprop-21.c (revision 0) @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-optimized" } */ +typedef int v4si __attribute__ ((vector_size (4 * sizeof(int)))); + +int +test (v4si *x, v4si *y) +{ + v4si m = { 2, 3, 6, 5 }; + v4si z = __builtin_shuffle (*x, *y, m); + return z[2]; +} +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "optimized" } } */ +/* { dg-final { cleanup-tree-dump "optimized" } } */ Property changes on: testsuite/gcc.dg/tree-ssa/forwprop-21.c ___________________________________________________________________ Added: svn:eol-style + native Added: svn:keywords + Author Date Id Revision URL Index: tree-ssa-forwprop.c =================================================================== --- tree-ssa-forwprop.c (revision 190847) +++ tree-ssa-forwprop.c (working copy) @@ -2570,20 +2570,88 @@ combine_conversions (gimple_stmt_iterato gimple_assign_set_rhs_code (stmt, CONVERT_EXPR); update_stmt (stmt); return remove_prop_source_from_use (op0) ? 2 : 1; } } } return 0; } +/* Combine an element access with a shuffle. Returns true if there were + any changes made, else it returns false. */ + +static bool +simplify_bitfield (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op, op0, op1, op2; + unsigned idx, n, size; + enum tree_code code; + + op = gimple_assign_rhs1 (stmt); + gcc_checking_assert (TREE_CODE (op) == BIT_FIELD_REF); + + op0 = TREE_OPERAND (op, 0); + op1 = TREE_OPERAND (op, 1); + op2 = TREE_OPERAND (op, 2); + + if (TREE_CODE (TREE_TYPE (op0)) != VECTOR_TYPE) + return false; + + size = TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (TREE_TYPE (op0)))); + n = TREE_INT_CST_LOW (op1) / size; + idx = TREE_INT_CST_LOW (op2) / size; + + if (n != 1) + return false; + + if (TREE_CODE (op0) != SSA_NAME) + return false; + + def_stmt = SSA_NAME_DEF_STMT (op0); + if (!def_stmt || !is_gimple_assign (def_stmt) + || !can_propagate_from (def_stmt)) + return false; + + code = gimple_assign_rhs_code (def_stmt); + + if (code == VEC_PERM_EXPR) + { + tree p, m, index, tem; + unsigned nelts; + m = gimple_assign_rhs3 (def_stmt); + if (TREE_CODE (m) != VECTOR_CST) + return false; + nelts = VECTOR_CST_NELTS (m); + idx = TREE_INT_CST_LOW (VECTOR_CST_ELT (m, idx)); + idx %= 2 * nelts; + if (idx < nelts) + { + p = gimple_assign_rhs1 (def_stmt); + } + else + { + p = gimple_assign_rhs2 (def_stmt); + idx -= nelts; + } + index = build_int_cst (TREE_TYPE (TREE_TYPE (m)), idx * size); + tem = fold_build3 (BIT_FIELD_REF, TREE_TYPE (op), p, op1, index); + gimple_assign_set_rhs1 (stmt, tem); + update_stmt (stmt); + return true; + } + + return false; +} + /* Determine whether applying the 2 permutations (mask1 then mask2) gives back one of the input. */ static int is_combined_permutation_identity (tree mask1, tree mask2) { tree mask; unsigned int nelts, i, j; bool maybe_identity1 = true; bool maybe_identity2 = true; @@ -2828,20 +2896,22 @@ ssa_forward_propagate_and_combine (void) cfg_changed = true; changed = did_something != 0; } else if (code == VEC_PERM_EXPR) { int did_something = simplify_permutation (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; } + else if (code == BIT_FIELD_REF) + changed = simplify_bitfield (&gsi); break; } case GIMPLE_SWITCH: changed = simplify_gimple_switch (stmt); break; case GIMPLE_COND: { int did_something;