Message ID | alpine.DEB.2.02.1208181146450.5811@stedding.saclay.inria.fr |
---|---|
State | New |
Headers | show |
On Sat, Aug 18, 2012 at 11:59 AM, Marc Glisse <marc.glisse@inria.fr> wrote: > Hello, > > here is a new patch (passes bootstrap+regtest), which only combines > permutations if the result is the identity. I'll file a PR about more > general > combinations to link to this conversation and the cost hook proposition. > > Ok? Ok. Thanks, Richard. > > 2012-08-18 Marc Glisse <marc.glisse@inria.fr> > > > gcc/ > * fold-const.c (fold_ternary_loc): Detect identity permutations. > Canonicalize permutations more. > * tree-ssa-forwprop.c (is_combined_permutation_identity): New > function. > > (simplify_permutation): Likewise. > (ssa_forward_propagate_and_combine): Call it. > > gcc/testsuite/ > * gcc.dg/tree-ssa/forwprop-19.c: New testcase. > * gcc.dg/fold-perm.c: Likewise. > > -- > Marc Glisse > Index: gcc/fold-const.c > =================================================================== > --- gcc/fold-const.c (revision 190500) > +++ gcc/fold-const.c (working copy) > @@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t > return fold_build2_loc (loc, PLUS_EXPR, type, > const_binop (MULT_EXPR, arg0, arg1), arg2); > if (integer_zerop (arg2)) > return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1); > > return fold_fma (loc, type, arg0, arg1, arg2); > > case VEC_PERM_EXPR: > if (TREE_CODE (arg2) == VECTOR_CST) > { > - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; > + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask; > unsigned char *sel = XALLOCAVEC (unsigned char, nelts); > tree t; > bool need_mask_canon = false; > + bool all_in_vec0 = true; > + bool all_in_vec1 = true; > + bool maybe_identity = true; > + bool single_arg = (op0 == op1); > + bool changed = false; > > + mask = single_arg ? (nelts - 1) : (2 * nelts - 1); > gcc_assert (nelts == VECTOR_CST_NELTS (arg2)); > for (i = 0; i < nelts; i++) > { > tree val = VECTOR_CST_ELT (arg2, i); > if (TREE_CODE (val) != INTEGER_CST) > return NULL_TREE; > > - sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1); > + sel[i] = TREE_INT_CST_LOW (val) & mask; > if (TREE_INT_CST_HIGH (val) > || ((unsigned HOST_WIDE_INT) > TREE_INT_CST_LOW (val) != sel[i])) > need_mask_canon = true; > + > + if (sel[i] < nelts) > + all_in_vec1 = false; > + else > + all_in_vec0 = false; > + > + if ((sel[i] & (nelts-1)) != i) > + maybe_identity = false; > + } > + > + if (maybe_identity) > + { > + if (all_in_vec0) > + return op0; > + if (all_in_vec1) > + return op1; > } > > if ((TREE_CODE (arg0) == VECTOR_CST > || TREE_CODE (arg0) == CONSTRUCTOR) > && (TREE_CODE (arg1) == VECTOR_CST > || TREE_CODE (arg1) == CONSTRUCTOR)) > { > t = fold_vec_perm (type, arg0, arg1, sel); > if (t != NULL_TREE) > return t; > } > > + if (all_in_vec0) > + op1 = op0; > + else if (all_in_vec1) > + { > + op0 = op1; > + for (i = 0; i < nelts; i++) > + sel[i] -= nelts; > + need_mask_canon = true; > + } > + > + if (op0 == op1 && !single_arg) > + changed = true; > + > if (need_mask_canon && arg2 == op2) > { > tree *tsel = XALLOCAVEC (tree, nelts); > tree eltype = TREE_TYPE (TREE_TYPE (arg2)); > for (i = 0; i < nelts; i++) > tsel[i] = build_int_cst (eltype, sel[i]); > - t = build_vector (TREE_TYPE (arg2), tsel); > - return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t); > + op2 = build_vector (TREE_TYPE (arg2), tsel); > + changed = true; > } > + > + if (changed) > + return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); > } > return NULL_TREE; > > default: > return NULL_TREE; > } /* switch (code) */ > } > > /* Perform constant folding and related simplification of EXPR. > The related simplifications include x*1 => x, x*0 => 0, etc., > Index: gcc/tree-ssa-forwprop.c > =================================================================== > --- gcc/tree-ssa-forwprop.c (revision 190500) > +++ gcc/tree-ssa-forwprop.c (working copy) > @@ -2570,20 +2570,109 @@ combine_conversions (gimple_stmt_iterato > gimple_assign_set_rhs_code (stmt, CONVERT_EXPR); > update_stmt (stmt); > return remove_prop_source_from_use (op0) ? 2 : 1; > } > } > } > > return 0; > } > > +/* Determine whether applying the 2 permutations (mask1 then mask2) > + gives back one of the input. */ > + > +static int > +is_combined_permutation_identity (tree mask1, tree mask2) > +{ > + tree mask; > + unsigned int nelts, i, j; > + bool maybe_identity1 = true; > + bool maybe_identity2 = true; > + > + gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST > + && TREE_CODE (mask2) == VECTOR_CST); > + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, > mask2); > + gcc_assert (TREE_CODE (mask) == VECTOR_CST); > + > + nelts = VECTOR_CST_NELTS (mask); > + for (i = 0; i < nelts; i++) > + { > + tree val = VECTOR_CST_ELT (mask, i); > + gcc_assert (TREE_CODE (val) == INTEGER_CST); > + j = TREE_INT_CST_LOW (val) & (2 * nelts - 1); > + if (j == i) > + maybe_identity2 = false; > + else if (j == i + nelts) > + maybe_identity1 = false; > + else > + return 0; > + } > + return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0; > +} > + > +/* Combine two shuffles in a row. Returns 1 if there were any changes > + made, 2 if cfg-cleanup needs to run. Else it returns 0. */ > + > +static int > +simplify_permutation (gimple_stmt_iterator *gsi) > +{ > + gimple stmt = gsi_stmt (*gsi); > + gimple def_stmt; > + tree op0, op1, op2, op3; > + enum tree_code code = gimple_assign_rhs_code (stmt); > + enum tree_code code2; > + > + gcc_checking_assert (code == VEC_PERM_EXPR); > + > + op0 = gimple_assign_rhs1 (stmt); > + op1 = gimple_assign_rhs2 (stmt); > + op2 = gimple_assign_rhs3 (stmt); > + > + if (TREE_CODE (op0) != SSA_NAME) > + return 0; > + > + if (TREE_CODE (op2) != VECTOR_CST) > + return 0; > + > + if (op0 != op1) > + return 0; > + > + def_stmt = SSA_NAME_DEF_STMT (op0); > + if (!def_stmt || !is_gimple_assign (def_stmt) > + || !can_propagate_from (def_stmt)) > + return 0; > + > + code2 = gimple_assign_rhs_code (def_stmt); > + > + /* Two consecutive shuffles. */ > + if (code2 == VEC_PERM_EXPR) > + { > + tree orig; > + int ident; > + op3 = gimple_assign_rhs3 (def_stmt); > + if (TREE_CODE (op3) != VECTOR_CST) > + return 0; > + ident = is_combined_permutation_identity (op3, op2); > + if (!ident) > + return 0; > + orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt) > + : gimple_assign_rhs2 (def_stmt); > + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); > + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); > + gimple_set_num_ops (stmt, 2); > + update_stmt (stmt); > + return remove_prop_source_from_use (op0) ? 2 : 1; > + } > + > + return false; > +} > + > /* Main entry point for the forward propagation and statement combine > optimizer. */ > > static unsigned int > ssa_forward_propagate_and_combine (void) > { > basic_block bb; > unsigned int todoflags = 0; > > cfg_changed = false; > @@ -2732,20 +2821,27 @@ ssa_forward_propagate_and_combine (void) > changed = associate_pointerplus (&gsi); > else if (CONVERT_EXPR_CODE_P (code) > || code == FLOAT_EXPR > || code == FIX_TRUNC_EXPR) > { > int did_something = combine_conversions (&gsi); > if (did_something == 2) > cfg_changed = true; > changed = did_something != 0; > } > + else if (code == VEC_PERM_EXPR) > + { > + int did_something = simplify_permutation (&gsi); > + if (did_something == 2) > + cfg_changed = true; > + changed = did_something != 0; > + } > break; > } > > case GIMPLE_SWITCH: > changed = simplify_gimple_switch (stmt); > break; > > case GIMPLE_COND: > { > int did_something; > Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c > =================================================================== > --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) > +++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-forwprop2" } */ > + > +typedef int vec __attribute__((vector_size (4 * sizeof (int)))); > +void f (vec *x1, vec *x2) > +{ > + vec m = { 1, 2, 3, 0 }; > + vec n = { 3, 0, 1, 2 }; > + vec y = __builtin_shuffle (*x1, *x2, n); > + vec z = __builtin_shuffle (y, m); > + *x1 = z; > +} > + > +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "forwprop2" } } */ > +/* { dg-final { cleanup-tree-dump "forwprop2" } } */ > > Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c > ___________________________________________________________________ > Added: svn:keywords > + Author Date Id Revision URL > Added: svn:eol-style > + native > > Index: gcc/testsuite/gcc.dg/fold-perm.c > =================================================================== > --- gcc/testsuite/gcc.dg/fold-perm.c (revision 0) > +++ gcc/testsuite/gcc.dg/fold-perm.c (revision 0) > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O -fdump-tree-ccp1" } */ > + > +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); > + > +void fun (veci *f, veci *g, veci *h) > +{ > + veci m = { 7, 7, 4, 6 }; > + veci n = { 0, 1, 2, 3 }; > + veci p = { 1, 1, 7, 6 }; > + *h = __builtin_shuffle (*h, *h, p); > + *g = __builtin_shuffle (*f, *g, m); > + *f = __builtin_shuffle (*f, *g, n); > +} > + > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 3, 0, 2 }" "ccp1" } } > */ > +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 3, 2 }" "ccp1" } } > */ > +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "ccp1" } } */ > +/* { dg-final { cleanup-tree-dump "ccp1" } } */ > > Property changes on: gcc/testsuite/gcc.dg/fold-perm.c > ___________________________________________________________________ > Added: svn:eol-style > + native > Added: svn:keywords > + Author Date Id Revision URL > >
Index: gcc/fold-const.c =================================================================== --- gcc/fold-const.c (revision 190500) +++ gcc/fold-const.c (working copy) @@ -14148,58 +14148,96 @@ fold_ternary_loc (location_t loc, enum t return fold_build2_loc (loc, PLUS_EXPR, type, const_binop (MULT_EXPR, arg0, arg1), arg2); if (integer_zerop (arg2)) return fold_build2_loc (loc, MULT_EXPR, type, arg0, arg1); return fold_fma (loc, type, arg0, arg1, arg2); case VEC_PERM_EXPR: if (TREE_CODE (arg2) == VECTOR_CST) { - unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i; + unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i, mask; unsigned char *sel = XALLOCAVEC (unsigned char, nelts); tree t; bool need_mask_canon = false; + bool all_in_vec0 = true; + bool all_in_vec1 = true; + bool maybe_identity = true; + bool single_arg = (op0 == op1); + bool changed = false; + mask = single_arg ? (nelts - 1) : (2 * nelts - 1); gcc_assert (nelts == VECTOR_CST_NELTS (arg2)); for (i = 0; i < nelts; i++) { tree val = VECTOR_CST_ELT (arg2, i); if (TREE_CODE (val) != INTEGER_CST) return NULL_TREE; - sel[i] = TREE_INT_CST_LOW (val) & (2 * nelts - 1); + sel[i] = TREE_INT_CST_LOW (val) & mask; if (TREE_INT_CST_HIGH (val) || ((unsigned HOST_WIDE_INT) TREE_INT_CST_LOW (val) != sel[i])) need_mask_canon = true; + + if (sel[i] < nelts) + all_in_vec1 = false; + else + all_in_vec0 = false; + + if ((sel[i] & (nelts-1)) != i) + maybe_identity = false; + } + + if (maybe_identity) + { + if (all_in_vec0) + return op0; + if (all_in_vec1) + return op1; } if ((TREE_CODE (arg0) == VECTOR_CST || TREE_CODE (arg0) == CONSTRUCTOR) && (TREE_CODE (arg1) == VECTOR_CST || TREE_CODE (arg1) == CONSTRUCTOR)) { t = fold_vec_perm (type, arg0, arg1, sel); if (t != NULL_TREE) return t; } + if (all_in_vec0) + op1 = op0; + else if (all_in_vec1) + { + op0 = op1; + for (i = 0; i < nelts; i++) + sel[i] -= nelts; + need_mask_canon = true; + } + + if (op0 == op1 && !single_arg) + changed = true; + if (need_mask_canon && arg2 == op2) { tree *tsel = XALLOCAVEC (tree, nelts); tree eltype = TREE_TYPE (TREE_TYPE (arg2)); for (i = 0; i < nelts; i++) tsel[i] = build_int_cst (eltype, sel[i]); - t = build_vector (TREE_TYPE (arg2), tsel); - return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, t); + op2 = build_vector (TREE_TYPE (arg2), tsel); + changed = true; } + + if (changed) + return build3_loc (loc, VEC_PERM_EXPR, type, op0, op1, op2); } return NULL_TREE; default: return NULL_TREE; } /* switch (code) */ } /* Perform constant folding and related simplification of EXPR. The related simplifications include x*1 => x, x*0 => 0, etc., Index: gcc/tree-ssa-forwprop.c =================================================================== --- gcc/tree-ssa-forwprop.c (revision 190500) +++ gcc/tree-ssa-forwprop.c (working copy) @@ -2570,20 +2570,109 @@ combine_conversions (gimple_stmt_iterato gimple_assign_set_rhs_code (stmt, CONVERT_EXPR); update_stmt (stmt); return remove_prop_source_from_use (op0) ? 2 : 1; } } } return 0; } +/* Determine whether applying the 2 permutations (mask1 then mask2) + gives back one of the input. */ + +static int +is_combined_permutation_identity (tree mask1, tree mask2) +{ + tree mask; + unsigned int nelts, i, j; + bool maybe_identity1 = true; + bool maybe_identity2 = true; + + gcc_checking_assert (TREE_CODE (mask1) == VECTOR_CST + && TREE_CODE (mask2) == VECTOR_CST); + mask = fold_ternary (VEC_PERM_EXPR, TREE_TYPE (mask1), mask1, mask1, mask2); + gcc_assert (TREE_CODE (mask) == VECTOR_CST); + + nelts = VECTOR_CST_NELTS (mask); + for (i = 0; i < nelts; i++) + { + tree val = VECTOR_CST_ELT (mask, i); + gcc_assert (TREE_CODE (val) == INTEGER_CST); + j = TREE_INT_CST_LOW (val) & (2 * nelts - 1); + if (j == i) + maybe_identity2 = false; + else if (j == i + nelts) + maybe_identity1 = false; + else + return 0; + } + return maybe_identity1 ? 1 : maybe_identity2 ? 2 : 0; +} + +/* Combine two shuffles in a row. Returns 1 if there were any changes + made, 2 if cfg-cleanup needs to run. Else it returns 0. */ + +static int +simplify_permutation (gimple_stmt_iterator *gsi) +{ + gimple stmt = gsi_stmt (*gsi); + gimple def_stmt; + tree op0, op1, op2, op3; + enum tree_code code = gimple_assign_rhs_code (stmt); + enum tree_code code2; + + gcc_checking_assert (code == VEC_PERM_EXPR); + + op0 = gimple_assign_rhs1 (stmt); + op1 = gimple_assign_rhs2 (stmt); + op2 = gimple_assign_rhs3 (stmt); + + if (TREE_CODE (op0) != SSA_NAME) + return 0; + + if (TREE_CODE (op2) != VECTOR_CST) + return 0; + + if (op0 != op1) + return 0; + + def_stmt = SSA_NAME_DEF_STMT (op0); + if (!def_stmt || !is_gimple_assign (def_stmt) + || !can_propagate_from (def_stmt)) + return 0; + + code2 = gimple_assign_rhs_code (def_stmt); + + /* Two consecutive shuffles. */ + if (code2 == VEC_PERM_EXPR) + { + tree orig; + int ident; + op3 = gimple_assign_rhs3 (def_stmt); + if (TREE_CODE (op3) != VECTOR_CST) + return 0; + ident = is_combined_permutation_identity (op3, op2); + if (!ident) + return 0; + orig = (ident == 1) ? gimple_assign_rhs1 (def_stmt) + : gimple_assign_rhs2 (def_stmt); + gimple_assign_set_rhs1 (stmt, unshare_expr (orig)); + gimple_assign_set_rhs_code (stmt, TREE_CODE (orig)); + gimple_set_num_ops (stmt, 2); + update_stmt (stmt); + return remove_prop_source_from_use (op0) ? 2 : 1; + } + + return false; +} + /* Main entry point for the forward propagation and statement combine optimizer. */ static unsigned int ssa_forward_propagate_and_combine (void) { basic_block bb; unsigned int todoflags = 0; cfg_changed = false; @@ -2732,20 +2821,27 @@ ssa_forward_propagate_and_combine (void) changed = associate_pointerplus (&gsi); else if (CONVERT_EXPR_CODE_P (code) || code == FLOAT_EXPR || code == FIX_TRUNC_EXPR) { int did_something = combine_conversions (&gsi); if (did_something == 2) cfg_changed = true; changed = did_something != 0; } + else if (code == VEC_PERM_EXPR) + { + int did_something = simplify_permutation (&gsi); + if (did_something == 2) + cfg_changed = true; + changed = did_something != 0; + } break; } case GIMPLE_SWITCH: changed = simplify_gimple_switch (stmt); break; case GIMPLE_COND: { int did_something; Index: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c =================================================================== --- gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) +++ gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-forwprop2" } */ + +typedef int vec __attribute__((vector_size (4 * sizeof (int)))); +void f (vec *x1, vec *x2) +{ + vec m = { 1, 2, 3, 0 }; + vec n = { 3, 0, 1, 2 }; + vec y = __builtin_shuffle (*x1, *x2, n); + vec z = __builtin_shuffle (y, m); + *x1 = z; +} + +/* { dg-final { scan-tree-dump-not "VEC_PERM_EXPR" "forwprop2" } } */ +/* { dg-final { cleanup-tree-dump "forwprop2" } } */ Property changes on: gcc/testsuite/gcc.dg/tree-ssa/forwprop-19.c ___________________________________________________________________ Added: svn:keywords + Author Date Id Revision URL Added: svn:eol-style + native Index: gcc/testsuite/gcc.dg/fold-perm.c =================================================================== --- gcc/testsuite/gcc.dg/fold-perm.c (revision 0) +++ gcc/testsuite/gcc.dg/fold-perm.c (revision 0) @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-ccp1" } */ + +typedef int veci __attribute__ ((vector_size (4 * sizeof (int)))); + +void fun (veci *f, veci *g, veci *h) +{ + veci m = { 7, 7, 4, 6 }; + veci n = { 0, 1, 2, 3 }; + veci p = { 1, 1, 7, 6 }; + *h = __builtin_shuffle (*h, *h, p); + *g = __builtin_shuffle (*f, *g, m); + *f = __builtin_shuffle (*f, *g, n); +} + +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 3, 3, 0, 2 }" "ccp1" } } */ +/* { dg-final { scan-tree-dump "VEC_PERM_EXPR.*{ 1, 1, 3, 2 }" "ccp1" } } */ +/* { dg-final { scan-tree-dump-times "VEC_PERM_EXPR" 2 "ccp1" } } */ +/* { dg-final { cleanup-tree-dump "ccp1" } } */