Message ID | 20230508015900.3988239-1-hongtao.liu@intel.com |
---|---|
State | New |
Headers | show |
Series | [V2,vect] Enhance NARROW FLOAT_EXPR vectorization by truncating integer to lower precision. | expand |
ping. On Mon, May 8, 2023 at 9:59 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > @@ -4799,7 +4800,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > > stmt_vec_info stmt_info, > > > vec<tree> &vec_dsts, > > > gimple_stmt_iterator *gsi, > > > - slp_tree slp_node, enum tree_code code) > > > + slp_tree slp_node, enum tree_code code, > > > + bool last_stmt_p) > > > > Can you please document this new parameter? > > > Changed. > > > > > I understand what you are doing, but somehow it looks a bit awkward? > > Maybe we should split the NARROW case into NARROW_SRC and NARROW_DST? > > The case of narrowing the source because we know its range isn't a > > good fit for the > > flow. > Changed. > > Here's updated patch. > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > Ok for trunk? > > Similar like WIDEN FLOAT_EXPR, when direct_optab is not existed, try > intermediate integer type whenever gimple ranger can tell it's safe. > > .i.e. > When there's no direct optab for vector long long -> vector float, but > the value range of integer can be represented as int, try vector int > -> vector float if availble. > > gcc/ChangeLog: > > PR tree-optimization/108804 > * tree-vect-patterns.cc (vect_get_range_info): Remove static. > * tree-vect-stmts.cc (vect_create_vectorized_demotion_stmts): > Add new parameter narrow_src_p. > (vectorizable_conversion): Enhance NARROW FLOAT_EXPR > vectorization by truncating to lower precision. > * tree-vectorizer.h (vect_get_range_info): New declare. > > gcc/testsuite/ChangeLog: > > * gcc.target/i386/pr108804.c: New test. > --- > gcc/testsuite/gcc.target/i386/pr108804.c | 15 +++ > gcc/tree-vect-patterns.cc | 2 +- > gcc/tree-vect-stmts.cc | 135 +++++++++++++++++------ > gcc/tree-vectorizer.h | 1 + > 4 files changed, 121 insertions(+), 32 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr108804.c > > diff --git a/gcc/testsuite/gcc.target/i386/pr108804.c b/gcc/testsuite/gcc.target/i386/pr108804.c > new file mode 100644 > index 00000000000..2a43c1e1848 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr108804.c > @@ -0,0 +1,15 @@ > +/* { dg-do compile } */ > +/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */ > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */ > + > +typedef unsigned long long uint64_t; > +uint64_t d[512]; > +float f[1024]; > + > +void foo() { > + for (int i=0; i<512; ++i) { > + uint64_t k = d[i]; > + f[i]=(k & 0x3F30FFFF); > + } > +} > + > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > index a49b0953977..dd546b488a4 100644 > --- a/gcc/tree-vect-patterns.cc > +++ b/gcc/tree-vect-patterns.cc > @@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see > /* Return true if we have a useful VR_RANGE range for VAR, storing it > in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ > > -static bool > +bool > vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value) > { > value_range vr; > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > index 6b7dbfd4a23..3da89a8402d 100644 > --- a/gcc/tree-vect-stmts.cc > +++ b/gcc/tree-vect-stmts.cc > @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see > #include "internal-fn.h" > #include "tree-vector-builder.h" > #include "vec-perm-indices.h" > +#include "gimple-range.h" > #include "tree-ssa-loop-niter.h" > #include "gimple-fold.h" > #include "regs.h" > @@ -4791,7 +4792,9 @@ vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code, > > /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. > For multi-step conversions store the resulting vectors and call the function > - recursively. */ > + recursively. When NARROW_SRC_P is true, there's still a conversion after > + narrowing, don't store the vectors in the SLP_NODE or in vector info of > + the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */ > > static void > vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > @@ -4799,7 +4802,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > stmt_vec_info stmt_info, > vec<tree> &vec_dsts, > gimple_stmt_iterator *gsi, > - slp_tree slp_node, enum tree_code code) > + slp_tree slp_node, enum tree_code code, > + bool narrow_src_p) > { > unsigned int i; > tree vop0, vop1, new_tmp, vec_dest; > @@ -4815,9 +4819,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > new_tmp = make_ssa_name (vec_dest, new_stmt); > gimple_assign_set_lhs (new_stmt, new_tmp); > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > - > - if (multi_step_cvt) > - /* Store the resulting vector for next recursive call. */ > + if (multi_step_cvt || narrow_src_p) > + /* Store the resulting vector for next recursive call, > + or return the resulting vector_tmp for NARROW FLOAT_EXPR. */ > (*vec_oprnds)[i/2] = new_tmp; > else > { > @@ -4843,7 +4847,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds, > multi_step_cvt - 1, > stmt_info, vec_dsts, gsi, > - slp_node, VEC_PACK_TRUNC_EXPR); > + slp_node, VEC_PACK_TRUNC_EXPR, > + narrow_src_p); > } > > vec_dsts.quick_push (vec_dest); > @@ -4988,7 +4993,15 @@ vectorizable_conversion (vec_info *vinfo, > tree vectype_out, vectype_in; > int ncopies, i; > tree lhs_type, rhs_type; > - enum { NARROW, NONE, WIDEN } modifier; > + /* For conversions between floating point and integer, there're 2 NARROW > + cases. NARROW_SRC is for FLOAT_EXPR, means > + integer --DEMOTION--> integer --FLOAT_EXPR--> floating point. > + This is safe when the range of the source integer can fit into the lower > + precision. NARROW_DST is for FIX_TRUNC_EXPR, means > + floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER. > + For other conversions, when there's narrowing, NARROW_DST is used as > + default. */ > + enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier; > vec<tree> vec_oprnds0 = vNULL; > vec<tree> vec_oprnds1 = vNULL; > tree vop0; > @@ -5123,7 +5136,7 @@ vectorizable_conversion (vec_info *vinfo, > else > modifier = NONE; > else if (multiple_p (nunits_out, nunits_in)) > - modifier = NARROW; > + modifier = NARROW_DST; > else > { > gcc_checking_assert (multiple_p (nunits_in, nunits_out)); > @@ -5135,7 +5148,7 @@ vectorizable_conversion (vec_info *vinfo, > case of SLP. */ > if (slp_node) > ncopies = 1; > - else if (modifier == NARROW) > + else if (modifier == NARROW_DST) > ncopies = vect_get_num_copies (loop_vinfo, vectype_out); > else > ncopies = vect_get_num_copies (loop_vinfo, vectype_in); > @@ -5241,29 +5254,63 @@ vectorizable_conversion (vec_info *vinfo, > } > break; > > - case NARROW: > + case NARROW_DST: > gcc_assert (op_type == unary_op); > if (supportable_narrowing_operation (code, vectype_out, vectype_in, > &code1, &multi_step_cvt, > &interm_types)) > break; > > - if (code != FIX_TRUNC_EXPR > - || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) > + if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) > goto unsupported; > > - cvt_type > - = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); > - cvt_type = get_same_sized_vectype (cvt_type, vectype_in); > - if (cvt_type == NULL_TREE) > - goto unsupported; > - if (!supportable_convert_operation (code, cvt_type, vectype_in, > - &codecvt1)) > - goto unsupported; > - if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, > - &code1, &multi_step_cvt, > - &interm_types)) > - break; > + if (code == FIX_TRUNC_EXPR) > + { > + cvt_type > + = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); > + cvt_type = get_same_sized_vectype (cvt_type, vectype_in); > + if (cvt_type == NULL_TREE) > + goto unsupported; > + if (!supportable_convert_operation (code, cvt_type, vectype_in, > + &codecvt1)) > + goto unsupported; > + if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, > + &code1, &multi_step_cvt, > + &interm_types)) > + break; > + } > + /* If op0 can be represented with low precision integer, > + truncate it to cvt_type and the do FLOAT_EXPR. */ > + else if (code == FLOAT_EXPR) > + { > + wide_int op_min_value, op_max_value; > + if (!vect_get_range_info (op0, &op_min_value, &op_max_value)) > + goto unsupported; > + > + cvt_type > + = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0); > + if (cvt_type == NULL_TREE > + || (wi::min_precision (op_max_value, SIGNED) > + > TYPE_PRECISION (cvt_type)) > + || (wi::min_precision (op_min_value, SIGNED) > + > TYPE_PRECISION (cvt_type))) > + goto unsupported; > + > + cvt_type = get_same_sized_vectype (cvt_type, vectype_out); > + if (cvt_type == NULL_TREE) > + goto unsupported; > + if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in, > + &code1, &multi_step_cvt, > + &interm_types)) > + goto unsupported; > + if (supportable_convert_operation (code, vectype_out, > + cvt_type, &codecvt1)) > + { > + modifier = NARROW_SRC; > + break; > + } > + } > + > goto unsupported; > > default: > @@ -5288,7 +5335,7 @@ vectorizable_conversion (vec_info *vinfo, > vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, > cost_vec); > } > - else if (modifier == NARROW) > + else if (modifier == NARROW_SRC || modifier == NARROW_DST) > { > STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; > /* The final packing step produces one vector result per copy. */ > @@ -5335,8 +5382,10 @@ vectorizable_conversion (vec_info *vinfo, > from supportable_*_operation, and store them in the correct order > for future use in vect_create_vectorized_*_stmts (). */ > auto_vec<tree> vec_dsts (multi_step_cvt + 1); > + bool widen_or_narrow_float_p > + = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC); > vec_dest = vect_create_destination_var (scalar_dest, > - (cvt_type && modifier == WIDEN) > + widen_or_narrow_float_p > ? cvt_type : vectype_out); > vec_dsts.quick_push (vec_dest); > > @@ -5353,7 +5402,7 @@ vectorizable_conversion (vec_info *vinfo, > > if (cvt_type) > vec_dest = vect_create_destination_var (scalar_dest, > - modifier == WIDEN > + widen_or_narrow_float_p > ? vectype_out : cvt_type); > > int ninputs = 1; > @@ -5361,7 +5410,7 @@ vectorizable_conversion (vec_info *vinfo, > { > if (modifier == WIDEN) > ; > - else if (modifier == NARROW) > + else if (modifier == NARROW_SRC || modifier == NARROW_DST) > { > if (multi_step_cvt) > ninputs = vect_pow2 (multi_step_cvt); > @@ -5448,7 +5497,8 @@ vectorizable_conversion (vec_info *vinfo, > } > break; > > - case NARROW: > + case NARROW_SRC: > + case NARROW_DST: > /* In case the vectorization factor (VF) is bigger than the number > of elements that we can fit in a vectype (nunits), we have to > generate more than one vector stmt - i.e - we need to "unroll" > @@ -5456,7 +5506,7 @@ vectorizable_conversion (vec_info *vinfo, > vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, > op0, &vec_oprnds0); > /* Arguments are ready. Create the new vector stmts. */ > - if (cvt_type) > + if (cvt_type && modifier == NARROW_DST) > FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) > { > gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); > @@ -5470,7 +5520,30 @@ vectorizable_conversion (vec_info *vinfo, > vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0, > multi_step_cvt, > stmt_info, vec_dsts, gsi, > - slp_node, code1); > + slp_node, code1, > + modifier == NARROW_SRC); > + /* After demoting op0 to cvt_type, convert it to dest. */ > + if (cvt_type && code == FLOAT_EXPR) > + { > + for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++) > + { > + /* Arguments are ready, create the new vector stmt. */ > + gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); > + gassign *new_stmt > + = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]); > + new_temp = make_ssa_name (vec_dest, new_stmt); > + gimple_assign_set_lhs (new_stmt, new_temp); > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > + > + /* This is the last step of the conversion sequence. Store the > + vectors in SLP_NODE or in vector info of the scalar statement > + (or in STMT_VINFO_RELATED_STMT chain). */ > + if (slp_node) > + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); > + else > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); > + } > + } > break; > } > if (!slp_node) > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index 9cf2fb23fe3..27fa2306a5e 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2384,6 +2384,7 @@ extern bool compatible_calls_p (gcall *, gcall *); > /* In tree-vect-patterns.cc. */ > extern void > vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree); > +extern bool vect_get_range_info (tree, wide_int*, wide_int*); > > /* Pattern recognition functions. > Additional pattern recognition functions can (and will) be added > -- > 2.39.1.388.g2fc9e9ca3c >
On Mon, May 29, 2023 at 5:21 AM Hongtao Liu via Gcc-patches <gcc-patches@gcc.gnu.org> wrote: > > ping. > > On Mon, May 8, 2023 at 9:59 AM liuhongt <hongtao.liu@intel.com> wrote: > > > > > > @@ -4799,7 +4800,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > > > stmt_vec_info stmt_info, > > > > vec<tree> &vec_dsts, > > > > gimple_stmt_iterator *gsi, > > > > - slp_tree slp_node, enum tree_code code) > > > > + slp_tree slp_node, enum tree_code code, > > > > + bool last_stmt_p) > > > > > > Can you please document this new parameter? > > > > > Changed. > > > > > > > > I understand what you are doing, but somehow it looks a bit awkward? > > > Maybe we should split the NARROW case into NARROW_SRC and NARROW_DST? > > > The case of narrowing the source because we know its range isn't a > > > good fit for the > > > flow. > > Changed. > > > > Here's updated patch. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}. > > Ok for trunk? OK, sorry for the delay. Thanks, Richard. > > Similar like WIDEN FLOAT_EXPR, when direct_optab is not existed, try > > intermediate integer type whenever gimple ranger can tell it's safe. > > > > .i.e. > > When there's no direct optab for vector long long -> vector float, but > > the value range of integer can be represented as int, try vector int > > -> vector float if availble. > > > > gcc/ChangeLog: > > > > PR tree-optimization/108804 > > * tree-vect-patterns.cc (vect_get_range_info): Remove static. > > * tree-vect-stmts.cc (vect_create_vectorized_demotion_stmts): > > Add new parameter narrow_src_p. > > (vectorizable_conversion): Enhance NARROW FLOAT_EXPR > > vectorization by truncating to lower precision. > > * tree-vectorizer.h (vect_get_range_info): New declare. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/i386/pr108804.c: New test. > > --- > > gcc/testsuite/gcc.target/i386/pr108804.c | 15 +++ > > gcc/tree-vect-patterns.cc | 2 +- > > gcc/tree-vect-stmts.cc | 135 +++++++++++++++++------ > > gcc/tree-vectorizer.h | 1 + > > 4 files changed, 121 insertions(+), 32 deletions(-) > > create mode 100644 gcc/testsuite/gcc.target/i386/pr108804.c > > > > diff --git a/gcc/testsuite/gcc.target/i386/pr108804.c b/gcc/testsuite/gcc.target/i386/pr108804.c > > new file mode 100644 > > index 00000000000..2a43c1e1848 > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/i386/pr108804.c > > @@ -0,0 +1,15 @@ > > +/* { dg-do compile } */ > > +/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */ > > +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */ > > + > > +typedef unsigned long long uint64_t; > > +uint64_t d[512]; > > +float f[1024]; > > + > > +void foo() { > > + for (int i=0; i<512; ++i) { > > + uint64_t k = d[i]; > > + f[i]=(k & 0x3F30FFFF); > > + } > > +} > > + > > diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc > > index a49b0953977..dd546b488a4 100644 > > --- a/gcc/tree-vect-patterns.cc > > +++ b/gcc/tree-vect-patterns.cc > > @@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see > > /* Return true if we have a useful VR_RANGE range for VAR, storing it > > in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ > > > > -static bool > > +bool > > vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value) > > { > > value_range vr; > > diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc > > index 6b7dbfd4a23..3da89a8402d 100644 > > --- a/gcc/tree-vect-stmts.cc > > +++ b/gcc/tree-vect-stmts.cc > > @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see > > #include "internal-fn.h" > > #include "tree-vector-builder.h" > > #include "vec-perm-indices.h" > > +#include "gimple-range.h" > > #include "tree-ssa-loop-niter.h" > > #include "gimple-fold.h" > > #include "regs.h" > > @@ -4791,7 +4792,9 @@ vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code, > > > > /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. > > For multi-step conversions store the resulting vectors and call the function > > - recursively. */ > > + recursively. When NARROW_SRC_P is true, there's still a conversion after > > + narrowing, don't store the vectors in the SLP_NODE or in vector info of > > + the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */ > > > > static void > > vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > @@ -4799,7 +4802,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > stmt_vec_info stmt_info, > > vec<tree> &vec_dsts, > > gimple_stmt_iterator *gsi, > > - slp_tree slp_node, enum tree_code code) > > + slp_tree slp_node, enum tree_code code, > > + bool narrow_src_p) > > { > > unsigned int i; > > tree vop0, vop1, new_tmp, vec_dest; > > @@ -4815,9 +4819,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > new_tmp = make_ssa_name (vec_dest, new_stmt); > > gimple_assign_set_lhs (new_stmt, new_tmp); > > vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > > - > > - if (multi_step_cvt) > > - /* Store the resulting vector for next recursive call. */ > > + if (multi_step_cvt || narrow_src_p) > > + /* Store the resulting vector for next recursive call, > > + or return the resulting vector_tmp for NARROW FLOAT_EXPR. */ > > (*vec_oprnds)[i/2] = new_tmp; > > else > > { > > @@ -4843,7 +4847,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, > > vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds, > > multi_step_cvt - 1, > > stmt_info, vec_dsts, gsi, > > - slp_node, VEC_PACK_TRUNC_EXPR); > > + slp_node, VEC_PACK_TRUNC_EXPR, > > + narrow_src_p); > > } > > > > vec_dsts.quick_push (vec_dest); > > @@ -4988,7 +4993,15 @@ vectorizable_conversion (vec_info *vinfo, > > tree vectype_out, vectype_in; > > int ncopies, i; > > tree lhs_type, rhs_type; > > - enum { NARROW, NONE, WIDEN } modifier; > > + /* For conversions between floating point and integer, there're 2 NARROW > > + cases. NARROW_SRC is for FLOAT_EXPR, means > > + integer --DEMOTION--> integer --FLOAT_EXPR--> floating point. > > + This is safe when the range of the source integer can fit into the lower > > + precision. NARROW_DST is for FIX_TRUNC_EXPR, means > > + floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER. > > + For other conversions, when there's narrowing, NARROW_DST is used as > > + default. */ > > + enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier; > > vec<tree> vec_oprnds0 = vNULL; > > vec<tree> vec_oprnds1 = vNULL; > > tree vop0; > > @@ -5123,7 +5136,7 @@ vectorizable_conversion (vec_info *vinfo, > > else > > modifier = NONE; > > else if (multiple_p (nunits_out, nunits_in)) > > - modifier = NARROW; > > + modifier = NARROW_DST; > > else > > { > > gcc_checking_assert (multiple_p (nunits_in, nunits_out)); > > @@ -5135,7 +5148,7 @@ vectorizable_conversion (vec_info *vinfo, > > case of SLP. */ > > if (slp_node) > > ncopies = 1; > > - else if (modifier == NARROW) > > + else if (modifier == NARROW_DST) > > ncopies = vect_get_num_copies (loop_vinfo, vectype_out); > > else > > ncopies = vect_get_num_copies (loop_vinfo, vectype_in); > > @@ -5241,29 +5254,63 @@ vectorizable_conversion (vec_info *vinfo, > > } > > break; > > > > - case NARROW: > > + case NARROW_DST: > > gcc_assert (op_type == unary_op); > > if (supportable_narrowing_operation (code, vectype_out, vectype_in, > > &code1, &multi_step_cvt, > > &interm_types)) > > break; > > > > - if (code != FIX_TRUNC_EXPR > > - || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) > > + if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) > > goto unsupported; > > > > - cvt_type > > - = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); > > - cvt_type = get_same_sized_vectype (cvt_type, vectype_in); > > - if (cvt_type == NULL_TREE) > > - goto unsupported; > > - if (!supportable_convert_operation (code, cvt_type, vectype_in, > > - &codecvt1)) > > - goto unsupported; > > - if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, > > - &code1, &multi_step_cvt, > > - &interm_types)) > > - break; > > + if (code == FIX_TRUNC_EXPR) > > + { > > + cvt_type > > + = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); > > + cvt_type = get_same_sized_vectype (cvt_type, vectype_in); > > + if (cvt_type == NULL_TREE) > > + goto unsupported; > > + if (!supportable_convert_operation (code, cvt_type, vectype_in, > > + &codecvt1)) > > + goto unsupported; > > + if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, > > + &code1, &multi_step_cvt, > > + &interm_types)) > > + break; > > + } > > + /* If op0 can be represented with low precision integer, > > + truncate it to cvt_type and the do FLOAT_EXPR. */ > > + else if (code == FLOAT_EXPR) > > + { > > + wide_int op_min_value, op_max_value; > > + if (!vect_get_range_info (op0, &op_min_value, &op_max_value)) > > + goto unsupported; > > + > > + cvt_type > > + = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0); > > + if (cvt_type == NULL_TREE > > + || (wi::min_precision (op_max_value, SIGNED) > > + > TYPE_PRECISION (cvt_type)) > > + || (wi::min_precision (op_min_value, SIGNED) > > + > TYPE_PRECISION (cvt_type))) > > + goto unsupported; > > + > > + cvt_type = get_same_sized_vectype (cvt_type, vectype_out); > > + if (cvt_type == NULL_TREE) > > + goto unsupported; > > + if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in, > > + &code1, &multi_step_cvt, > > + &interm_types)) > > + goto unsupported; > > + if (supportable_convert_operation (code, vectype_out, > > + cvt_type, &codecvt1)) > > + { > > + modifier = NARROW_SRC; > > + break; > > + } > > + } > > + > > goto unsupported; > > > > default: > > @@ -5288,7 +5335,7 @@ vectorizable_conversion (vec_info *vinfo, > > vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, > > cost_vec); > > } > > - else if (modifier == NARROW) > > + else if (modifier == NARROW_SRC || modifier == NARROW_DST) > > { > > STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; > > /* The final packing step produces one vector result per copy. */ > > @@ -5335,8 +5382,10 @@ vectorizable_conversion (vec_info *vinfo, > > from supportable_*_operation, and store them in the correct order > > for future use in vect_create_vectorized_*_stmts (). */ > > auto_vec<tree> vec_dsts (multi_step_cvt + 1); > > + bool widen_or_narrow_float_p > > + = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC); > > vec_dest = vect_create_destination_var (scalar_dest, > > - (cvt_type && modifier == WIDEN) > > + widen_or_narrow_float_p > > ? cvt_type : vectype_out); > > vec_dsts.quick_push (vec_dest); > > > > @@ -5353,7 +5402,7 @@ vectorizable_conversion (vec_info *vinfo, > > > > if (cvt_type) > > vec_dest = vect_create_destination_var (scalar_dest, > > - modifier == WIDEN > > + widen_or_narrow_float_p > > ? vectype_out : cvt_type); > > > > int ninputs = 1; > > @@ -5361,7 +5410,7 @@ vectorizable_conversion (vec_info *vinfo, > > { > > if (modifier == WIDEN) > > ; > > - else if (modifier == NARROW) > > + else if (modifier == NARROW_SRC || modifier == NARROW_DST) > > { > > if (multi_step_cvt) > > ninputs = vect_pow2 (multi_step_cvt); > > @@ -5448,7 +5497,8 @@ vectorizable_conversion (vec_info *vinfo, > > } > > break; > > > > - case NARROW: > > + case NARROW_SRC: > > + case NARROW_DST: > > /* In case the vectorization factor (VF) is bigger than the number > > of elements that we can fit in a vectype (nunits), we have to > > generate more than one vector stmt - i.e - we need to "unroll" > > @@ -5456,7 +5506,7 @@ vectorizable_conversion (vec_info *vinfo, > > vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, > > op0, &vec_oprnds0); > > /* Arguments are ready. Create the new vector stmts. */ > > - if (cvt_type) > > + if (cvt_type && modifier == NARROW_DST) > > FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) > > { > > gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); > > @@ -5470,7 +5520,30 @@ vectorizable_conversion (vec_info *vinfo, > > vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0, > > multi_step_cvt, > > stmt_info, vec_dsts, gsi, > > - slp_node, code1); > > + slp_node, code1, > > + modifier == NARROW_SRC); > > + /* After demoting op0 to cvt_type, convert it to dest. */ > > + if (cvt_type && code == FLOAT_EXPR) > > + { > > + for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++) > > + { > > + /* Arguments are ready, create the new vector stmt. */ > > + gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); > > + gassign *new_stmt > > + = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]); > > + new_temp = make_ssa_name (vec_dest, new_stmt); > > + gimple_assign_set_lhs (new_stmt, new_temp); > > + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); > > + > > + /* This is the last step of the conversion sequence. Store the > > + vectors in SLP_NODE or in vector info of the scalar statement > > + (or in STMT_VINFO_RELATED_STMT chain). */ > > + if (slp_node) > > + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); > > + else > > + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); > > + } > > + } > > break; > > } > > if (!slp_node) > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > > index 9cf2fb23fe3..27fa2306a5e 100644 > > --- a/gcc/tree-vectorizer.h > > +++ b/gcc/tree-vectorizer.h > > @@ -2384,6 +2384,7 @@ extern bool compatible_calls_p (gcall *, gcall *); > > /* In tree-vect-patterns.cc. */ > > extern void > > vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree); > > +extern bool vect_get_range_info (tree, wide_int*, wide_int*); > > > > /* Pattern recognition functions. > > Additional pattern recognition functions can (and will) be added > > -- > > 2.39.1.388.g2fc9e9ca3c > > > > > -- > BR, > Hongtao
diff --git a/gcc/testsuite/gcc.target/i386/pr108804.c b/gcc/testsuite/gcc.target/i386/pr108804.c new file mode 100644 index 00000000000..2a43c1e1848 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr108804.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx2 -Ofast -fdump-tree-vect-details" } */ +/* { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 1 "vect" } } */ + +typedef unsigned long long uint64_t; +uint64_t d[512]; +float f[1024]; + +void foo() { + for (int i=0; i<512; ++i) { + uint64_t k = d[i]; + f[i]=(k & 0x3F30FFFF); + } +} + diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc index a49b0953977..dd546b488a4 100644 --- a/gcc/tree-vect-patterns.cc +++ b/gcc/tree-vect-patterns.cc @@ -61,7 +61,7 @@ along with GCC; see the file COPYING3. If not see /* Return true if we have a useful VR_RANGE range for VAR, storing it in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ -static bool +bool vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value) { value_range vr; diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 6b7dbfd4a23..3da89a8402d 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -51,6 +51,7 @@ along with GCC; see the file COPYING3. If not see #include "internal-fn.h" #include "tree-vector-builder.h" #include "vec-perm-indices.h" +#include "gimple-range.h" #include "tree-ssa-loop-niter.h" #include "gimple-fold.h" #include "regs.h" @@ -4791,7 +4792,9 @@ vect_gen_widened_results_half (vec_info *vinfo, enum tree_code code, /* Create vectorized demotion statements for vector operands from VEC_OPRNDS. For multi-step conversions store the resulting vectors and call the function - recursively. */ + recursively. When NARROW_SRC_P is true, there's still a conversion after + narrowing, don't store the vectors in the SLP_NODE or in vector info of + the scalar statement(or in STMT_VINFO_RELATED_STMT chain). */ static void vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, @@ -4799,7 +4802,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, stmt_vec_info stmt_info, vec<tree> &vec_dsts, gimple_stmt_iterator *gsi, - slp_tree slp_node, enum tree_code code) + slp_tree slp_node, enum tree_code code, + bool narrow_src_p) { unsigned int i; tree vop0, vop1, new_tmp, vec_dest; @@ -4815,9 +4819,9 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, new_tmp = make_ssa_name (vec_dest, new_stmt); gimple_assign_set_lhs (new_stmt, new_tmp); vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); - - if (multi_step_cvt) - /* Store the resulting vector for next recursive call. */ + if (multi_step_cvt || narrow_src_p) + /* Store the resulting vector for next recursive call, + or return the resulting vector_tmp for NARROW FLOAT_EXPR. */ (*vec_oprnds)[i/2] = new_tmp; else { @@ -4843,7 +4847,8 @@ vect_create_vectorized_demotion_stmts (vec_info *vinfo, vec<tree> *vec_oprnds, vect_create_vectorized_demotion_stmts (vinfo, vec_oprnds, multi_step_cvt - 1, stmt_info, vec_dsts, gsi, - slp_node, VEC_PACK_TRUNC_EXPR); + slp_node, VEC_PACK_TRUNC_EXPR, + narrow_src_p); } vec_dsts.quick_push (vec_dest); @@ -4988,7 +4993,15 @@ vectorizable_conversion (vec_info *vinfo, tree vectype_out, vectype_in; int ncopies, i; tree lhs_type, rhs_type; - enum { NARROW, NONE, WIDEN } modifier; + /* For conversions between floating point and integer, there're 2 NARROW + cases. NARROW_SRC is for FLOAT_EXPR, means + integer --DEMOTION--> integer --FLOAT_EXPR--> floating point. + This is safe when the range of the source integer can fit into the lower + precision. NARROW_DST is for FIX_TRUNC_EXPR, means + floating point --FIX_TRUNC_EXPR--> integer --DEMOTION--> INTEGER. + For other conversions, when there's narrowing, NARROW_DST is used as + default. */ + enum { NARROW_SRC, NARROW_DST, NONE, WIDEN } modifier; vec<tree> vec_oprnds0 = vNULL; vec<tree> vec_oprnds1 = vNULL; tree vop0; @@ -5123,7 +5136,7 @@ vectorizable_conversion (vec_info *vinfo, else modifier = NONE; else if (multiple_p (nunits_out, nunits_in)) - modifier = NARROW; + modifier = NARROW_DST; else { gcc_checking_assert (multiple_p (nunits_in, nunits_out)); @@ -5135,7 +5148,7 @@ vectorizable_conversion (vec_info *vinfo, case of SLP. */ if (slp_node) ncopies = 1; - else if (modifier == NARROW) + else if (modifier == NARROW_DST) ncopies = vect_get_num_copies (loop_vinfo, vectype_out); else ncopies = vect_get_num_copies (loop_vinfo, vectype_in); @@ -5241,29 +5254,63 @@ vectorizable_conversion (vec_info *vinfo, } break; - case NARROW: + case NARROW_DST: gcc_assert (op_type == unary_op); if (supportable_narrowing_operation (code, vectype_out, vectype_in, &code1, &multi_step_cvt, &interm_types)) break; - if (code != FIX_TRUNC_EXPR - || GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) + if (GET_MODE_SIZE (lhs_mode) >= GET_MODE_SIZE (rhs_mode)) goto unsupported; - cvt_type - = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); - cvt_type = get_same_sized_vectype (cvt_type, vectype_in); - if (cvt_type == NULL_TREE) - goto unsupported; - if (!supportable_convert_operation (code, cvt_type, vectype_in, - &codecvt1)) - goto unsupported; - if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, - &code1, &multi_step_cvt, - &interm_types)) - break; + if (code == FIX_TRUNC_EXPR) + { + cvt_type + = build_nonstandard_integer_type (GET_MODE_BITSIZE (rhs_mode), 0); + cvt_type = get_same_sized_vectype (cvt_type, vectype_in); + if (cvt_type == NULL_TREE) + goto unsupported; + if (!supportable_convert_operation (code, cvt_type, vectype_in, + &codecvt1)) + goto unsupported; + if (supportable_narrowing_operation (NOP_EXPR, vectype_out, cvt_type, + &code1, &multi_step_cvt, + &interm_types)) + break; + } + /* If op0 can be represented with low precision integer, + truncate it to cvt_type and the do FLOAT_EXPR. */ + else if (code == FLOAT_EXPR) + { + wide_int op_min_value, op_max_value; + if (!vect_get_range_info (op0, &op_min_value, &op_max_value)) + goto unsupported; + + cvt_type + = build_nonstandard_integer_type (GET_MODE_BITSIZE (lhs_mode), 0); + if (cvt_type == NULL_TREE + || (wi::min_precision (op_max_value, SIGNED) + > TYPE_PRECISION (cvt_type)) + || (wi::min_precision (op_min_value, SIGNED) + > TYPE_PRECISION (cvt_type))) + goto unsupported; + + cvt_type = get_same_sized_vectype (cvt_type, vectype_out); + if (cvt_type == NULL_TREE) + goto unsupported; + if (!supportable_narrowing_operation (NOP_EXPR, cvt_type, vectype_in, + &code1, &multi_step_cvt, + &interm_types)) + goto unsupported; + if (supportable_convert_operation (code, vectype_out, + cvt_type, &codecvt1)) + { + modifier = NARROW_SRC; + break; + } + } + goto unsupported; default: @@ -5288,7 +5335,7 @@ vectorizable_conversion (vec_info *vinfo, vect_model_simple_cost (vinfo, stmt_info, ncopies, dt, ndts, slp_node, cost_vec); } - else if (modifier == NARROW) + else if (modifier == NARROW_SRC || modifier == NARROW_DST) { STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; /* The final packing step produces one vector result per copy. */ @@ -5335,8 +5382,10 @@ vectorizable_conversion (vec_info *vinfo, from supportable_*_operation, and store them in the correct order for future use in vect_create_vectorized_*_stmts (). */ auto_vec<tree> vec_dsts (multi_step_cvt + 1); + bool widen_or_narrow_float_p + = cvt_type && (modifier == WIDEN || modifier == NARROW_SRC); vec_dest = vect_create_destination_var (scalar_dest, - (cvt_type && modifier == WIDEN) + widen_or_narrow_float_p ? cvt_type : vectype_out); vec_dsts.quick_push (vec_dest); @@ -5353,7 +5402,7 @@ vectorizable_conversion (vec_info *vinfo, if (cvt_type) vec_dest = vect_create_destination_var (scalar_dest, - modifier == WIDEN + widen_or_narrow_float_p ? vectype_out : cvt_type); int ninputs = 1; @@ -5361,7 +5410,7 @@ vectorizable_conversion (vec_info *vinfo, { if (modifier == WIDEN) ; - else if (modifier == NARROW) + else if (modifier == NARROW_SRC || modifier == NARROW_DST) { if (multi_step_cvt) ninputs = vect_pow2 (multi_step_cvt); @@ -5448,7 +5497,8 @@ vectorizable_conversion (vec_info *vinfo, } break; - case NARROW: + case NARROW_SRC: + case NARROW_DST: /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" @@ -5456,7 +5506,7 @@ vectorizable_conversion (vec_info *vinfo, vect_get_vec_defs (vinfo, stmt_info, slp_node, ncopies * ninputs, op0, &vec_oprnds0); /* Arguments are ready. Create the new vector stmts. */ - if (cvt_type) + if (cvt_type && modifier == NARROW_DST) FOR_EACH_VEC_ELT (vec_oprnds0, i, vop0) { gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); @@ -5470,7 +5520,30 @@ vectorizable_conversion (vec_info *vinfo, vect_create_vectorized_demotion_stmts (vinfo, &vec_oprnds0, multi_step_cvt, stmt_info, vec_dsts, gsi, - slp_node, code1); + slp_node, code1, + modifier == NARROW_SRC); + /* After demoting op0 to cvt_type, convert it to dest. */ + if (cvt_type && code == FLOAT_EXPR) + { + for (unsigned int i = 0; i != vec_oprnds0.length() / 2; i++) + { + /* Arguments are ready, create the new vector stmt. */ + gcc_assert (TREE_CODE_LENGTH (codecvt1) == unary_op); + gassign *new_stmt + = gimple_build_assign (vec_dest, codecvt1, vec_oprnds0[i]); + new_temp = make_ssa_name (vec_dest, new_stmt); + gimple_assign_set_lhs (new_stmt, new_temp); + vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi); + + /* This is the last step of the conversion sequence. Store the + vectors in SLP_NODE or in vector info of the scalar statement + (or in STMT_VINFO_RELATED_STMT chain). */ + if (slp_node) + SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); + else + STMT_VINFO_VEC_STMTS (stmt_info).safe_push (new_stmt); + } + } break; } if (!slp_node) diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index 9cf2fb23fe3..27fa2306a5e 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2384,6 +2384,7 @@ extern bool compatible_calls_p (gcall *, gcall *); /* In tree-vect-patterns.cc. */ extern void vect_mark_pattern_stmts (vec_info *, stmt_vec_info, gimple *, tree); +extern bool vect_get_range_info (tree, wide_int*, wide_int*); /* Pattern recognition functions. Additional pattern recognition functions can (and will) be added