Message ID | Pine.LNX.4.64.1009071446530.29722@wotan.suse.de |
---|---|
State | New |
Headers | show |
gcc-patches-owner@gcc.gnu.org wrote on 07/09/2010 03:58:56 PM: > > Hi, > > this implements handling of consecutive loads whose indices run backwards. > I'm handling the easy cases only, namely when no explicit realignment > instructions need to be generated and when we need to emit only one load > per original load (i.e. no multiple type width in the loop). > > This all requires that we be able to reverse the elements of a vector, > hence two new helper functions for that are added. > > Reversed stores aren't handled but should be reasonably easy to extend. > > Regstrapped on x86_64-linux. I've had to add x86_64-*-* to > target-supports.exp/vect_perm recognition which in turn makes > vect/slp-perm-8.c and vect/slp-perm-9.c fail then. That's most probably > because while x86_64 supports permutation for 4 and 8-sized elements it > doesn't do so for char (slp-perm-8.c) or short (slp-perm-9.c). I'm going > to investigate this but seek feedback on the patch itself already now. > Looks good to me. Ira > > Ciao, > Michael. > -- > PR tree-optimization/43432 > * tree-vect-data-refs.c (vect_analyze_data_ref_access): > Accept backwards consecutive accesses. > (vect_create_data_ref_ptr): If step is negative generate > decreasing IVs. > * tree-vect-stmts.c (vectorizable_store): Reject negative steps. > (perm_mask_for_reverse, reverse_vec_elements): New functions. > (vectorizable_load): Handle loads with negative steps when easily > possible. > > testsuite/ > PR tree-optimization/43432 > * gcc.dg/vect/pr43432.c: New test. > > Index: tree-vect-data-refs.c > =================================================================== > --- tree-vect-data-refs.c (revision 163773) > +++ tree-vect-data-refs.c (working copy) > @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat > } > > /* Consecutive? */ > - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) > + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) > + || (dr_step < 0 > + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) > { > /* Mark that it is not interleaving. */ > DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; > @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s > tree vptr; > gimple_stmt_iterator incr_gsi; > bool insert_after; > + bool negative; > tree indx_before_incr, indx_after_incr; > gimple incr; > tree step; > @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s > *inv_p = true; > else > *inv_p = false; > + negative = tree_int_cst_compare (step, size_zero_node) < 0; > > /* Create an expression for the first address accessed by this load > in LOOP. */ > @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s > LOOP is zero. In this case the step here is also zero. */ > if (*inv_p) > step = size_zero_node; > + else if (negative) > + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); > > standard_iv_increment_position (loop, &incr_gsi, &insert_after); > > Index: tree-vect-stmts.c > =================================================================== > --- tree-vect-stmts.c (revision 163773) > +++ tree-vect-stmts.c (working copy) > @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_ > if (!STMT_VINFO_DATA_REF (stmt_info)) > return false; > > + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "negative step for store."); > + return false; > + } > + > if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) > { > strided_store = true; > @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_ > return true; > } > > +/* Given a vector type VECTYPE returns a builtin DECL to be used > + for vector permutation and stores a mask into *MASK that implements > + reversal of the vector elements. If that is impossible to do > + returns NULL (and *MASK is unchanged). */ > + > +static tree > +perm_mask_for_reverse (tree vectype, tree *mask) > +{ > + tree builtin_decl; > + tree mask_element_type, mask_type; > + tree mask_vec = NULL; > + int i; > + int nunits; > + if (!targetm.vectorize.builtin_vec_perm) > + return NULL; > + > + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, > + &mask_element_type); > + if (!builtin_decl || !mask_element_type) > + return NULL; > + > + mask_type = get_vectype_for_scalar_type (mask_element_type); > + nunits = TYPE_VECTOR_SUBPARTS (vectype); > + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) > + return NULL; > + > + for (i = 0; i < nunits; i++) > + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, > i), mask_vec); > + mask_vec = build_vector (mask_type, mask_vec); > + > + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) > + return NULL; > + if (mask) > + *mask = mask_vec; > + return builtin_decl; > +} > + > +/* Given a vector variable X, that was generated for the scalar LHS of > + STMT, generate instructions to reverse the vector elements of X, > + insert them a *GSI and return the permuted vector variable. */ > + > +static tree > +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) > +{ > + tree vectype = TREE_TYPE (x); > + tree mask_vec, builtin_decl; > + tree perm_dest, data_ref; > + gimple perm_stmt; > + > + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); > + > + perm_dest = vect_create_destination_var (gimple_assign_lhs > (stmt), vectype); > + > + /* Generate the permute statement. */ > + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); > + data_ref = make_ssa_name (perm_dest, perm_stmt); > + gimple_call_set_lhs (perm_stmt, data_ref); > + vect_finish_stmt_generation (stmt, perm_stmt, gsi); > + > + return data_ref; > +} > + > /* vectorizable_load. > > Check if STMT reads a non scalar data-ref (array/pointer/structure) that > @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s > gimple first_stmt; > tree scalar_type; > bool inv_p; > + bool negative; > bool compute_in_loop = false; > struct loop *at_loop; > int vec_num; > @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s > if (!STMT_VINFO_DATA_REF (stmt_info)) > return false; > > + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; > + if (negative && ncopies > 1) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "multiple types with negative step."); > + return false; > + } > + > scalar_type = TREE_TYPE (DR_REF (dr)); > mode = TYPE_MODE (vectype); > > @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s > return false; > } > > + if (negative) > + { > + gcc_assert (!strided_load); > + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); > + if (alignment_support_scheme != dr_aligned > + && alignment_support_scheme != dr_unaligned_supported) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "negative step but alignment required."); > + return false; > + } > + if (!perm_mask_for_reverse (vectype, NULL)) > + { > + if (vect_print_dump_info (REPORT_DETAILS)) > + fprintf (vect_dump, "negative step and reversing not supported."); > + return false; > + } > + } > + > if (!vec_stmt) /* transformation not required. */ > { > STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; > @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s > else > at_loop = loop; > > + if (negative) > + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); > + > prev_stmt_info = NULL; > for (j = 0; j < ncopies; j++) > { > @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s > gcc_unreachable (); /* FORNOW. */ > } > > + if (negative) > + { > + new_temp = reverse_vec_elements (new_temp, stmt, gsi); > + new_stmt = SSA_NAME_DEF_STMT (new_temp); > + } > + > /* Collect vector loads and later create their permutation in > vect_transform_strided_load (). */ > if (strided_load || slp_perm) > Index: testsuite/gcc.dg/vect/pr43432.c > =================================================================== > --- testsuite/gcc.dg/vect/pr43432.c (revision 0) > +++ testsuite/gcc.dg/vect/pr43432.c (revision 0) > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target vect_float } */ > +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ > + > + > +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, > +int len){ > + int i; > + src1 += len-1; > + for(i=0; i<len; i++) > + dst[i] = src0[i] * src1[-i]; > +} > + > +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" > { target { vect_perm } } } } */ > +/* { dg-final { cleanup-tree-dump "vect" } } */
Index: tree-vect-data-refs.c =================================================================== --- tree-vect-data-refs.c (revision 163773) +++ tree-vect-data-refs.c (working copy) @@ -2281,7 +2281,9 @@ vect_analyze_data_ref_access (struct dat } /* Consecutive? */ - if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))) + if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)) + || (dr_step < 0 + && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step))) { /* Mark that it is not interleaving. */ DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL; @@ -2953,6 +2955,7 @@ vect_create_data_ref_ptr (gimple stmt, s tree vptr; gimple_stmt_iterator incr_gsi; bool insert_after; + bool negative; tree indx_before_incr, indx_after_incr; gimple incr; tree step; @@ -2985,6 +2988,7 @@ vect_create_data_ref_ptr (gimple stmt, s *inv_p = true; else *inv_p = false; + negative = tree_int_cst_compare (step, size_zero_node) < 0; /* Create an expression for the first address accessed by this load in LOOP. */ @@ -3144,6 +3148,8 @@ vect_create_data_ref_ptr (gimple stmt, s LOOP is zero. In this case the step here is also zero. */ if (*inv_p) step = size_zero_node; + else if (negative) + step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step); standard_iv_increment_position (loop, &incr_gsi, &insert_after); Index: tree-vect-stmts.c =================================================================== --- tree-vect-stmts.c (revision 163773) +++ tree-vect-stmts.c (working copy) @@ -3134,6 +3134,13 @@ vectorizable_store (gimple stmt, gimple_ if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step for store."); + return false; + } + if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) { strided_store = true; @@ -3400,6 +3407,68 @@ vectorizable_store (gimple stmt, gimple_ return true; } +/* Given a vector type VECTYPE returns a builtin DECL to be used + for vector permutation and stores a mask into *MASK that implements + reversal of the vector elements. If that is impossible to do + returns NULL (and *MASK is unchanged). */ + +static tree +perm_mask_for_reverse (tree vectype, tree *mask) +{ + tree builtin_decl; + tree mask_element_type, mask_type; + tree mask_vec = NULL; + int i; + int nunits; + if (!targetm.vectorize.builtin_vec_perm) + return NULL; + + builtin_decl = targetm.vectorize.builtin_vec_perm (vectype, + &mask_element_type); + if (!builtin_decl || !mask_element_type) + return NULL; + + mask_type = get_vectype_for_scalar_type (mask_element_type); + nunits = TYPE_VECTOR_SUBPARTS (vectype); + if (TYPE_VECTOR_SUBPARTS (vectype) != TYPE_VECTOR_SUBPARTS (mask_type)) + return NULL; + + for (i = 0; i < nunits; i++) + mask_vec = tree_cons (NULL, build_int_cst (mask_element_type, i), mask_vec); + mask_vec = build_vector (mask_type, mask_vec); + + if (!targetm.vectorize.builtin_vec_perm_ok (vectype, mask_vec)) + return NULL; + if (mask) + *mask = mask_vec; + return builtin_decl; +} + +/* Given a vector variable X, that was generated for the scalar LHS of + STMT, generate instructions to reverse the vector elements of X, + insert them a *GSI and return the permuted vector variable. */ + +static tree +reverse_vec_elements (tree x, gimple stmt, gimple_stmt_iterator *gsi) +{ + tree vectype = TREE_TYPE (x); + tree mask_vec, builtin_decl; + tree perm_dest, data_ref; + gimple perm_stmt; + + builtin_decl = perm_mask_for_reverse (vectype, &mask_vec); + + perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype); + + /* Generate the permute statement. */ + perm_stmt = gimple_build_call (builtin_decl, 3, x, x, mask_vec); + data_ref = make_ssa_name (perm_dest, perm_stmt); + gimple_call_set_lhs (perm_stmt, data_ref); + vect_finish_stmt_generation (stmt, perm_stmt, gsi); + + return data_ref; +} + /* vectorizable_load. Check if STMT reads a non scalar data-ref (array/pointer/structure) that @@ -3442,6 +3511,7 @@ vectorizable_load (gimple stmt, gimple_s gimple first_stmt; tree scalar_type; bool inv_p; + bool negative; bool compute_in_loop = false; struct loop *at_loop; int vec_num; @@ -3504,6 +3574,14 @@ vectorizable_load (gimple stmt, gimple_s if (!STMT_VINFO_DATA_REF (stmt_info)) return false; + negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + if (negative && ncopies > 1) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "multiple types with negative step."); + return false; + } + scalar_type = TREE_TYPE (DR_REF (dr)); mode = TYPE_MODE (vectype); @@ -3538,6 +3616,25 @@ vectorizable_load (gimple stmt, gimple_s return false; } + if (negative) + { + gcc_assert (!strided_load); + alignment_support_scheme = vect_supportable_dr_alignment (dr, false); + if (alignment_support_scheme != dr_aligned + && alignment_support_scheme != dr_unaligned_supported) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step but alignment required."); + return false; + } + if (!perm_mask_for_reverse (vectype, NULL)) + { + if (vect_print_dump_info (REPORT_DETAILS)) + fprintf (vect_dump, "negative step and reversing not supported."); + return false; + } + } + if (!vec_stmt) /* transformation not required. */ { STMT_VINFO_TYPE (stmt_info) = load_vec_info_type; @@ -3712,6 +3809,9 @@ vectorizable_load (gimple stmt, gimple_s else at_loop = loop; + if (negative) + offset = size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1); + prev_stmt_info = NULL; for (j = 0; j < ncopies; j++) { @@ -3885,6 +3985,12 @@ vectorizable_load (gimple stmt, gimple_s gcc_unreachable (); /* FORNOW. */ } + if (negative) + { + new_temp = reverse_vec_elements (new_temp, stmt, gsi); + new_stmt = SSA_NAME_DEF_STMT (new_temp); + } + /* Collect vector loads and later create their permutation in vect_transform_strided_load (). */ if (strided_load || slp_perm) Index: testsuite/gcc.dg/vect/pr43432.c =================================================================== --- testsuite/gcc.dg/vect/pr43432.c (revision 0) +++ testsuite/gcc.dg/vect/pr43432.c (revision 0) @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target vect_float } */ +/* { dg-options "-O3 -ffast-math -fdump-tree-vect-details" } */ + + +void vector_fmul_reverse_c(float *dst, const float *src0, const float *src1, +int len){ + int i; + src1 += len-1; + for(i=0; i<len; i++) + dst[i] = src0[i] * src1[-i]; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm } } } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */