Message ID | mpt8s1g3xnl.fsf_-_@arm.com |
---|---|
State | New |
Headers | show |
Series | vect: Move costing helpers from aarch64 code | expand |
On Thu, Aug 5, 2021 at 2:04 PM Richard Sandiford <richard.sandiford@arm.com> wrote: > > Richard Biener <richard.guenther@gmail.com> writes: > > On Tue, Aug 3, 2021 at 2:09 PM Richard Sandiford via Gcc-patches > > <gcc-patches@gcc.gnu.org> wrote: > >> > >> When the vectoriser scalarises a strided store, it counts one > >> scalar_store for each element plus one vec_to_scalar extraction > >> for each element. However, extracting element 0 is free on AArch64, > >> so it should have zero cost. > >> > >> I don't have a testcase that requires this for existing -mtune > >> options, but it becomes more important with a later patch. > >> > >> gcc/ > >> * config/aarch64/aarch64.c (aarch64_is_store_elt_extraction): New > >> function, split out from... > >> (aarch64_detect_vector_stmt_subtype): ...here. > >> (aarch64_add_stmt_cost): Treat extracting element 0 as free. > >> --- > >> gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++--- > >> 1 file changed, 19 insertions(+), 3 deletions(-) > >> > >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > >> index 36f11808916..084f8caa0da 100644 > >> --- a/gcc/config/aarch64/aarch64.c > >> +++ b/gcc/config/aarch64/aarch64.c > >> @@ -14622,6 +14622,18 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, > >> } > >> } > >> > >> +/* Return true if an operaton of kind KIND for STMT_INFO represents > >> + the extraction of an element from a vector in preparation for > >> + storing the element to memory. */ > >> +static bool > >> +aarch64_is_store_elt_extraction (vect_cost_for_stmt kind, > >> + stmt_vec_info stmt_info) > >> +{ > >> + return (kind == vec_to_scalar > >> + && STMT_VINFO_DATA_REF (stmt_info) > >> + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); > >> +} > > > > It would be nice to put functions like this in tree-vectorizer.h in some > > section marked with a comment to contain helpers for the target > > add_stmt_cost. > > Yeah, I guess that would avoid pointless cut-&-paste between targets. > How does this look? Tested on aarch64-linux-gnu and x86_64-linux-gnu. Looks good besides ... > Thanks, > Richard > > > gcc/ > * tree-vectorizer.h (vect_is_store_elt_extraction, vect_is_reduction) > (vect_reduc_type, vect_embedded_comparison_type, vect_comparison_type) > (vect_is_extending_load, vect_is_integer_truncation): New functions, > moved from aarch64.c but given different names. > * config/aarch64/aarch64.c (aarch64_is_store_elt_extraction) > (aarch64_is_reduction, aarch64_reduc_type) > (aarch64_embedded_comparison_type, aarch64_comparison_type) > (aarch64_extending_load_p, aarch64_integer_truncation_p): Delete > in favor of the above. Update callers accordingly. > --- > gcc/config/aarch64/aarch64.c | 125 ++++------------------------------- > gcc/tree-vectorizer.h | 104 +++++++++++++++++++++++++++++ > 2 files changed, 118 insertions(+), 111 deletions(-) > > diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h > index deb22477e28..fd8681747ca 100644 > --- a/gcc/tree-vectorizer.h > +++ b/gcc/tree-vectorizer.h > @@ -2192,4 +2192,108 @@ extern vect_pattern_decl_t slp_patterns[]; > /* Number of supported pattern matchers. */ > extern size_t num__slp_patterns; > > +/* ---------------------------------------------------------------------- > + Target support routines > + ----------------------------------------------------------------------- > + The following routines are provided to simplify costing decisions in > + target code. Please add more as needed. */ > + > +/* Return true if an operaton of kind KIND for STMT_INFO represents > + the extraction of an element from a vector in preparation for > + storing the element to memory. */ > +inline bool > +vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info) > +{ > + return (kind == vec_to_scalar > + && STMT_VINFO_DATA_REF (stmt_info) > + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); > +} > + > +/* Return true if STMT_INFO represents part of a reduction. */ > +inline bool > +vect_is_reduction (stmt_vec_info stmt_info) > +{ > + return (STMT_VINFO_REDUC_DEF (stmt_info) > + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); > +} > + > +/* If STMT_INFO describes a reduction, return the type of reduction > + it describes, otherwise return -1. */ > +inline int it's not clear what 'type of reduction' is - why not return enum vect_reduction_type? Because of the -1? Maybe we can simply add a NOT_REDUCTION member to the enum? Or simply adjust the comment as "return the vect_reduction_type of the reduction it describes, otherwise return -1"? > +vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) > +{ > + if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) > + if (STMT_VINFO_REDUC_DEF (stmt_info)) > + { > + stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); > + return int (STMT_VINFO_REDUC_TYPE (reduc_info)); > + } > + return -1; > +} > + > +/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the > + scalar type of the values being compared. Return null otherwise. */ > +inline tree > +vect_embedded_comparison_type (stmt_vec_info stmt_info) > +{ > + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) > + if (gimple_assign_rhs_code (assign) == COND_EXPR) > + { > + tree cond = gimple_assign_rhs1 (assign); > + if (COMPARISON_CLASS_P (cond)) > + return TREE_TYPE (TREE_OPERAND (cond, 0)); > + } > + return NULL_TREE; > +} > + > +/* If STMT_INFO is a comparison or contains an embedded comparison, return the > + scalar type of the values being compared. Return null otherwise. */ > +inline tree > +vect_comparison_type (stmt_vec_info stmt_info) > +{ > + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) > + if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) > + return TREE_TYPE (gimple_assign_rhs1 (assign)); > + return vect_embedded_comparison_type (stmt_info); > +} > + > +/* Return true if STMT_INFO extends the result of a load. */ > +inline bool > +vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info) > +{ > + /* Although this is quite large for an inline function, this part > + at least should be inline. */ > + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) > + return false; > + > + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); > + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); > + tree rhs_type = TREE_TYPE (rhs); > + if (!INTEGRAL_TYPE_P (lhs_type) > + || !INTEGRAL_TYPE_P (rhs_type) > + || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) > + return false; > + > + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); > + return (def_stmt_info > + && STMT_VINFO_DATA_REF (def_stmt_info) > + && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); > +} > + > +/* Return true if STMT_INFO is an integer truncation. */ > +inline bool > +vect_is_integer_truncation (stmt_vec_info stmt_info) > +{ > + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) > + return false; > + > + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); > + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); > + return (INTEGRAL_TYPE_P (lhs_type) > + && INTEGRAL_TYPE_P (rhs_type) > + && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); > +} > + > #endif /* GCC_TREE_VECTORIZER_H */ > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c > index e02cbcbcb38..a4456a86764 100644 > --- a/gcc/config/aarch64/aarch64.c > +++ b/gcc/config/aarch64/aarch64.c > @@ -14790,40 +14790,6 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, > } > } > > -/* Return true if an operaton of kind KIND for STMT_INFO represents > - the extraction of an element from a vector in preparation for > - storing the element to memory. */ > -static bool > -aarch64_is_store_elt_extraction (vect_cost_for_stmt kind, > - stmt_vec_info stmt_info) > -{ > - return (kind == vec_to_scalar > - && STMT_VINFO_DATA_REF (stmt_info) > - && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); > -} > - > -/* Return true if STMT_INFO represents part of a reduction. */ > -static bool > -aarch64_is_reduction (stmt_vec_info stmt_info) > -{ > - return (STMT_VINFO_REDUC_DEF (stmt_info) > - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); > -} > - > -/* If STMT_INFO describes a reduction, return the type of reduction > - it describes, otherwise return -1. */ > -static int > -aarch64_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) > -{ > - if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) > - if (STMT_VINFO_REDUC_DEF (stmt_info)) > - { > - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); > - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); > - } > - return -1; > -} > - > /* Return true if an access of kind KIND for STMT_INFO represents one > vector of an LD[234] or ST[234] operation. Return the total number of > vectors (2, 3 or 4) if so, otherwise return a value outside that range. */ > @@ -14844,32 +14810,6 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info) > return 0; > } > > -/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the > - scalar type of the values being compared. Return null otherwise. */ > -static tree > -aarch64_embedded_comparison_type (stmt_vec_info stmt_info) > -{ > - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) > - if (gimple_assign_rhs_code (assign) == COND_EXPR) > - { > - tree cond = gimple_assign_rhs1 (assign); > - if (COMPARISON_CLASS_P (cond)) > - return TREE_TYPE (TREE_OPERAND (cond, 0)); > - } > - return NULL_TREE; > -} > - > -/* If STMT_INFO is a comparison or contains an embedded comparison, return the > - scalar type of the values being compared. Return null otherwise. */ > -static tree > -aarch64_comparison_type (stmt_vec_info stmt_info) > -{ > - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) > - if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) > - return TREE_TYPE (gimple_assign_rhs1 (assign)); > - return aarch64_embedded_comparison_type (stmt_info); > -} > - > /* Return true if creating multiple copies of STMT_INFO for Advanced SIMD > vectors would produce a series of LDP or STP operations. KIND is the > kind of statement that STMT_INFO represents. */ > @@ -14896,43 +14836,6 @@ aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind, > return is_gimple_assign (stmt_info->stmt); > } > > -/* Return true if STMT_INFO extends the result of a load. */ > -static bool > -aarch64_extending_load_p (class vec_info *vinfo, stmt_vec_info stmt_info) > -{ > - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) > - return false; > - > - tree rhs = gimple_assign_rhs1 (stmt_info->stmt); > - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); > - tree rhs_type = TREE_TYPE (rhs); > - if (!INTEGRAL_TYPE_P (lhs_type) > - || !INTEGRAL_TYPE_P (rhs_type) > - || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) > - return false; > - > - stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); > - return (def_stmt_info > - && STMT_VINFO_DATA_REF (def_stmt_info) > - && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); > -} > - > -/* Return true if STMT_INFO is an integer truncation. */ > -static bool > -aarch64_integer_truncation_p (stmt_vec_info stmt_info) > -{ > - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); > - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) > - return false; > - > - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); > - tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); > - return (INTEGRAL_TYPE_P (lhs_type) > - && INTEGRAL_TYPE_P (rhs_type) > - && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); > -} > - > /* Return true if STMT_INFO is the second part of a two-statement multiply-add > or multiply-subtract sequence that might be suitable for fusing into a > single instruction. If VEC_FLAGS is zero, analyze the operation as > @@ -15035,7 +14938,7 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, > tree vectype, > const sve_vec_cost *sve_costs) > { > - switch (aarch64_reduc_type (vinfo, stmt_info)) > + switch (vect_reduc_type (vinfo, stmt_info)) > { > case EXTRACT_LAST_REDUCTION: > return sve_costs->clast_cost; > @@ -15126,7 +15029,7 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, > { > /* Detect an extension of a loaded value. In general, we'll be able to fuse > the extension with the load. */ > - if (kind == scalar_stmt && aarch64_extending_load_p (vinfo, stmt_info)) > + if (kind == scalar_stmt && vect_is_extending_load (vinfo, stmt_info)) > return 0; > > return stmt_cost; > @@ -15158,7 +15061,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, > /* Detect cases in which vec_to_scalar is describing the extraction of a > vector element in preparation for a scalar store. The store itself is > costed separately. */ > - if (aarch64_is_store_elt_extraction (kind, stmt_info)) > + if (vect_is_store_elt_extraction (kind, stmt_info)) > return simd_costs->store_elt_extra_cost; > > /* Detect SVE gather loads, which are costed as a single scalar_load > @@ -15197,7 +15100,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, > instruction like FADDP or MAXV. */ > if (kind == vec_to_scalar > && where == vect_epilogue > - && aarch64_is_reduction (stmt_info)) > + && vect_is_reduction (stmt_info)) > switch (GET_MODE_INNER (TYPE_MODE (vectype))) > { > case E_QImode: > @@ -15247,12 +15150,12 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind, > on the fly. Optimistically assume that a load followed by an extension > will fold to this form during combine, and that the extension therefore > comes for free. */ > - if (kind == vector_stmt && aarch64_extending_load_p (vinfo, stmt_info)) > + if (kind == vector_stmt && vect_is_extending_load (vinfo, stmt_info)) > stmt_cost = 0; > > /* For similar reasons, vector_stmt integer truncations are a no-op, > because we can just ignore the unused upper bits of the source. */ > - if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info)) > + if (kind == vector_stmt && vect_is_integer_truncation (stmt_info)) > stmt_cost = 0; > > /* Advanced SIMD can load and store pairs of registers using LDP and STP, > @@ -15327,7 +15230,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, > } > > if (kind == vector_stmt || kind == vec_to_scalar) > - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) > + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) > { > if (FLOAT_TYPE_P (cmp_type)) > stmt_cost += simd_costs->fp_stmt_cost; > @@ -15337,7 +15240,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, > } > > if (kind == scalar_stmt) > - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) > + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) > { > if (FLOAT_TYPE_P (cmp_type)) > stmt_cost += aarch64_tune_params.vec_costs->scalar_fp_stmt_cost; > @@ -15387,12 +15290,12 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, > /* Calculate the minimum cycles per iteration imposed by a reduction > operation. */ > if ((kind == vector_stmt || kind == vec_to_scalar) > - && aarch64_is_reduction (stmt_info)) > + && vect_is_reduction (stmt_info)) > { > unsigned int base > = aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype, > vec_flags); > - if (aarch64_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) > + if (vect_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) > { > if (aarch64_sve_mode_p (TYPE_MODE (vectype))) > { > @@ -15491,7 +15394,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, > > /* Add any embedded comparison operations. */ > if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar) > - && aarch64_embedded_comparison_type (stmt_info)) > + && vect_embedded_comparison_type (stmt_info)) > ops->general_ops += num_copies; > > /* Detect COND_REDUCTIONs and things that would need to become > @@ -15500,7 +15403,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, > have only accounted for one. */ > if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar)) > { > - int reduc_type = aarch64_reduc_type (vinfo, stmt_info); > + int reduc_type = vect_reduc_type (vinfo, stmt_info); > if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD)) > || reduc_type == COND_REDUCTION) > ops->general_ops += num_copies; > @@ -15508,7 +15411,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, > > /* Count the predicate operations needed by an SVE comparison. */ > if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar)) > - if (tree type = aarch64_comparison_type (stmt_info)) > + if (tree type = vect_comparison_type (stmt_info)) > { > unsigned int base = (FLOAT_TYPE_P (type) > ? sve_issue->fp_cmp_pred_ops > @@ -15586,7 +15489,7 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count, > /* If we scalarize a strided store, the vectorizer costs one > vec_to_scalar for each element. However, we can store the first > element using an FP store without a separate extract step. */ > - if (aarch64_is_store_elt_extraction (kind, stmt_info)) > + if (vect_is_store_elt_extraction (kind, stmt_info)) > count -= 1; > > stmt_cost = aarch64_detect_scalar_stmt_subtype
Richard Biener <richard.guenther@gmail.com> writes: > On Thu, Aug 5, 2021 at 2:04 PM Richard Sandiford > <richard.sandiford@arm.com> wrote: >> >> Richard Biener <richard.guenther@gmail.com> writes: >> > On Tue, Aug 3, 2021 at 2:09 PM Richard Sandiford via Gcc-patches >> > <gcc-patches@gcc.gnu.org> wrote: >> >> >> >> When the vectoriser scalarises a strided store, it counts one >> >> scalar_store for each element plus one vec_to_scalar extraction >> >> for each element. However, extracting element 0 is free on AArch64, >> >> so it should have zero cost. >> >> >> >> I don't have a testcase that requires this for existing -mtune >> >> options, but it becomes more important with a later patch. >> >> >> >> gcc/ >> >> * config/aarch64/aarch64.c (aarch64_is_store_elt_extraction): New >> >> function, split out from... >> >> (aarch64_detect_vector_stmt_subtype): ...here. >> >> (aarch64_add_stmt_cost): Treat extracting element 0 as free. >> >> --- >> >> gcc/config/aarch64/aarch64.c | 22 +++++++++++++++++++--- >> >> 1 file changed, 19 insertions(+), 3 deletions(-) >> >> >> >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> >> index 36f11808916..084f8caa0da 100644 >> >> --- a/gcc/config/aarch64/aarch64.c >> >> +++ b/gcc/config/aarch64/aarch64.c >> >> @@ -14622,6 +14622,18 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, >> >> } >> >> } >> >> >> >> +/* Return true if an operaton of kind KIND for STMT_INFO represents >> >> + the extraction of an element from a vector in preparation for >> >> + storing the element to memory. */ >> >> +static bool >> >> +aarch64_is_store_elt_extraction (vect_cost_for_stmt kind, >> >> + stmt_vec_info stmt_info) >> >> +{ >> >> + return (kind == vec_to_scalar >> >> + && STMT_VINFO_DATA_REF (stmt_info) >> >> + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); >> >> +} >> > >> > It would be nice to put functions like this in tree-vectorizer.h in some >> > section marked with a comment to contain helpers for the target >> > add_stmt_cost. >> >> Yeah, I guess that would avoid pointless cut-&-paste between targets. >> How does this look? Tested on aarch64-linux-gnu and x86_64-linux-gnu. > > Looks good besides ... > >> Thanks, >> Richard >> >> >> gcc/ >> * tree-vectorizer.h (vect_is_store_elt_extraction, vect_is_reduction) >> (vect_reduc_type, vect_embedded_comparison_type, vect_comparison_type) >> (vect_is_extending_load, vect_is_integer_truncation): New functions, >> moved from aarch64.c but given different names. >> * config/aarch64/aarch64.c (aarch64_is_store_elt_extraction) >> (aarch64_is_reduction, aarch64_reduc_type) >> (aarch64_embedded_comparison_type, aarch64_comparison_type) >> (aarch64_extending_load_p, aarch64_integer_truncation_p): Delete >> in favor of the above. Update callers accordingly. >> --- >> gcc/config/aarch64/aarch64.c | 125 ++++------------------------------- >> gcc/tree-vectorizer.h | 104 +++++++++++++++++++++++++++++ >> 2 files changed, 118 insertions(+), 111 deletions(-) >> >> diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h >> index deb22477e28..fd8681747ca 100644 >> --- a/gcc/tree-vectorizer.h >> +++ b/gcc/tree-vectorizer.h >> @@ -2192,4 +2192,108 @@ extern vect_pattern_decl_t slp_patterns[]; >> /* Number of supported pattern matchers. */ >> extern size_t num__slp_patterns; >> >> +/* ---------------------------------------------------------------------- >> + Target support routines >> + ----------------------------------------------------------------------- >> + The following routines are provided to simplify costing decisions in >> + target code. Please add more as needed. */ >> + >> +/* Return true if an operaton of kind KIND for STMT_INFO represents >> + the extraction of an element from a vector in preparation for >> + storing the element to memory. */ >> +inline bool >> +vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info) >> +{ >> + return (kind == vec_to_scalar >> + && STMT_VINFO_DATA_REF (stmt_info) >> + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); >> +} >> + >> +/* Return true if STMT_INFO represents part of a reduction. */ >> +inline bool >> +vect_is_reduction (stmt_vec_info stmt_info) >> +{ >> + return (STMT_VINFO_REDUC_DEF (stmt_info) >> + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); >> +} >> + >> +/* If STMT_INFO describes a reduction, return the type of reduction >> + it describes, otherwise return -1. */ >> +inline int > > it's not clear what 'type of reduction' is - why not return enum > vect_reduction_type? > Because of the -1? Maybe we can simply add a NOT_REDUCTION member > to the enum? Yeah, because of the -1. I don't like the idea of adding a fake value since it complicates switch statements that handle all non-fake values. > Or simply adjust the comment as "return the vect_reduction_type > of the reduction it describes, otherwise return -1"? Ah, yeah, that sounds better. I've pushed the patch with that change. Thanks, Richard > >> +vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) >> +{ >> + if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) >> + if (STMT_VINFO_REDUC_DEF (stmt_info)) >> + { >> + stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); >> + return int (STMT_VINFO_REDUC_TYPE (reduc_info)); >> + } >> + return -1; >> +} >> + >> +/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the >> + scalar type of the values being compared. Return null otherwise. */ >> +inline tree >> +vect_embedded_comparison_type (stmt_vec_info stmt_info) >> +{ >> + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) >> + if (gimple_assign_rhs_code (assign) == COND_EXPR) >> + { >> + tree cond = gimple_assign_rhs1 (assign); >> + if (COMPARISON_CLASS_P (cond)) >> + return TREE_TYPE (TREE_OPERAND (cond, 0)); >> + } >> + return NULL_TREE; >> +} >> + >> +/* If STMT_INFO is a comparison or contains an embedded comparison, return the >> + scalar type of the values being compared. Return null otherwise. */ >> +inline tree >> +vect_comparison_type (stmt_vec_info stmt_info) >> +{ >> + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) >> + if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) >> + return TREE_TYPE (gimple_assign_rhs1 (assign)); >> + return vect_embedded_comparison_type (stmt_info); >> +} >> + >> +/* Return true if STMT_INFO extends the result of a load. */ >> +inline bool >> +vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info) >> +{ >> + /* Although this is quite large for an inline function, this part >> + at least should be inline. */ >> + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); >> + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) >> + return false; >> + >> + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); >> + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); >> + tree rhs_type = TREE_TYPE (rhs); >> + if (!INTEGRAL_TYPE_P (lhs_type) >> + || !INTEGRAL_TYPE_P (rhs_type) >> + || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) >> + return false; >> + >> + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); >> + return (def_stmt_info >> + && STMT_VINFO_DATA_REF (def_stmt_info) >> + && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); >> +} >> + >> +/* Return true if STMT_INFO is an integer truncation. */ >> +inline bool >> +vect_is_integer_truncation (stmt_vec_info stmt_info) >> +{ >> + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); >> + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) >> + return false; >> + >> + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); >> + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); >> + return (INTEGRAL_TYPE_P (lhs_type) >> + && INTEGRAL_TYPE_P (rhs_type) >> + && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); >> +} >> + >> #endif /* GCC_TREE_VECTORIZER_H */ >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >> index e02cbcbcb38..a4456a86764 100644 >> --- a/gcc/config/aarch64/aarch64.c >> +++ b/gcc/config/aarch64/aarch64.c >> @@ -14790,40 +14790,6 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, >> } >> } >> >> -/* Return true if an operaton of kind KIND for STMT_INFO represents >> - the extraction of an element from a vector in preparation for >> - storing the element to memory. */ >> -static bool >> -aarch64_is_store_elt_extraction (vect_cost_for_stmt kind, >> - stmt_vec_info stmt_info) >> -{ >> - return (kind == vec_to_scalar >> - && STMT_VINFO_DATA_REF (stmt_info) >> - && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); >> -} >> - >> -/* Return true if STMT_INFO represents part of a reduction. */ >> -static bool >> -aarch64_is_reduction (stmt_vec_info stmt_info) >> -{ >> - return (STMT_VINFO_REDUC_DEF (stmt_info) >> - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); >> -} >> - >> -/* If STMT_INFO describes a reduction, return the type of reduction >> - it describes, otherwise return -1. */ >> -static int >> -aarch64_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) >> -{ >> - if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) >> - if (STMT_VINFO_REDUC_DEF (stmt_info)) >> - { >> - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); >> - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); >> - } >> - return -1; >> -} >> - >> /* Return true if an access of kind KIND for STMT_INFO represents one >> vector of an LD[234] or ST[234] operation. Return the total number of >> vectors (2, 3 or 4) if so, otherwise return a value outside that range. */ >> @@ -14844,32 +14810,6 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info) >> return 0; >> } >> >> -/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the >> - scalar type of the values being compared. Return null otherwise. */ >> -static tree >> -aarch64_embedded_comparison_type (stmt_vec_info stmt_info) >> -{ >> - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) >> - if (gimple_assign_rhs_code (assign) == COND_EXPR) >> - { >> - tree cond = gimple_assign_rhs1 (assign); >> - if (COMPARISON_CLASS_P (cond)) >> - return TREE_TYPE (TREE_OPERAND (cond, 0)); >> - } >> - return NULL_TREE; >> -} >> - >> -/* If STMT_INFO is a comparison or contains an embedded comparison, return the >> - scalar type of the values being compared. Return null otherwise. */ >> -static tree >> -aarch64_comparison_type (stmt_vec_info stmt_info) >> -{ >> - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) >> - if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) >> - return TREE_TYPE (gimple_assign_rhs1 (assign)); >> - return aarch64_embedded_comparison_type (stmt_info); >> -} >> - >> /* Return true if creating multiple copies of STMT_INFO for Advanced SIMD >> vectors would produce a series of LDP or STP operations. KIND is the >> kind of statement that STMT_INFO represents. */ >> @@ -14896,43 +14836,6 @@ aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind, >> return is_gimple_assign (stmt_info->stmt); >> } >> >> -/* Return true if STMT_INFO extends the result of a load. */ >> -static bool >> -aarch64_extending_load_p (class vec_info *vinfo, stmt_vec_info stmt_info) >> -{ >> - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); >> - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) >> - return false; >> - >> - tree rhs = gimple_assign_rhs1 (stmt_info->stmt); >> - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); >> - tree rhs_type = TREE_TYPE (rhs); >> - if (!INTEGRAL_TYPE_P (lhs_type) >> - || !INTEGRAL_TYPE_P (rhs_type) >> - || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) >> - return false; >> - >> - stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); >> - return (def_stmt_info >> - && STMT_VINFO_DATA_REF (def_stmt_info) >> - && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); >> -} >> - >> -/* Return true if STMT_INFO is an integer truncation. */ >> -static bool >> -aarch64_integer_truncation_p (stmt_vec_info stmt_info) >> -{ >> - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); >> - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) >> - return false; >> - >> - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); >> - tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); >> - return (INTEGRAL_TYPE_P (lhs_type) >> - && INTEGRAL_TYPE_P (rhs_type) >> - && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); >> -} >> - >> /* Return true if STMT_INFO is the second part of a two-statement multiply-add >> or multiply-subtract sequence that might be suitable for fusing into a >> single instruction. If VEC_FLAGS is zero, analyze the operation as >> @@ -15035,7 +14938,7 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, >> tree vectype, >> const sve_vec_cost *sve_costs) >> { >> - switch (aarch64_reduc_type (vinfo, stmt_info)) >> + switch (vect_reduc_type (vinfo, stmt_info)) >> { >> case EXTRACT_LAST_REDUCTION: >> return sve_costs->clast_cost; >> @@ -15126,7 +15029,7 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, >> { >> /* Detect an extension of a loaded value. In general, we'll be able to fuse >> the extension with the load. */ >> - if (kind == scalar_stmt && aarch64_extending_load_p (vinfo, stmt_info)) >> + if (kind == scalar_stmt && vect_is_extending_load (vinfo, stmt_info)) >> return 0; >> >> return stmt_cost; >> @@ -15158,7 +15061,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, >> /* Detect cases in which vec_to_scalar is describing the extraction of a >> vector element in preparation for a scalar store. The store itself is >> costed separately. */ >> - if (aarch64_is_store_elt_extraction (kind, stmt_info)) >> + if (vect_is_store_elt_extraction (kind, stmt_info)) >> return simd_costs->store_elt_extra_cost; >> >> /* Detect SVE gather loads, which are costed as a single scalar_load >> @@ -15197,7 +15100,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, >> instruction like FADDP or MAXV. */ >> if (kind == vec_to_scalar >> && where == vect_epilogue >> - && aarch64_is_reduction (stmt_info)) >> + && vect_is_reduction (stmt_info)) >> switch (GET_MODE_INNER (TYPE_MODE (vectype))) >> { >> case E_QImode: >> @@ -15247,12 +15150,12 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind, >> on the fly. Optimistically assume that a load followed by an extension >> will fold to this form during combine, and that the extension therefore >> comes for free. */ >> - if (kind == vector_stmt && aarch64_extending_load_p (vinfo, stmt_info)) >> + if (kind == vector_stmt && vect_is_extending_load (vinfo, stmt_info)) >> stmt_cost = 0; >> >> /* For similar reasons, vector_stmt integer truncations are a no-op, >> because we can just ignore the unused upper bits of the source. */ >> - if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info)) >> + if (kind == vector_stmt && vect_is_integer_truncation (stmt_info)) >> stmt_cost = 0; >> >> /* Advanced SIMD can load and store pairs of registers using LDP and STP, >> @@ -15327,7 +15230,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, >> } >> >> if (kind == vector_stmt || kind == vec_to_scalar) >> - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) >> + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) >> { >> if (FLOAT_TYPE_P (cmp_type)) >> stmt_cost += simd_costs->fp_stmt_cost; >> @@ -15337,7 +15240,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, >> } >> >> if (kind == scalar_stmt) >> - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) >> + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) >> { >> if (FLOAT_TYPE_P (cmp_type)) >> stmt_cost += aarch64_tune_params.vec_costs->scalar_fp_stmt_cost; >> @@ -15387,12 +15290,12 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, >> /* Calculate the minimum cycles per iteration imposed by a reduction >> operation. */ >> if ((kind == vector_stmt || kind == vec_to_scalar) >> - && aarch64_is_reduction (stmt_info)) >> + && vect_is_reduction (stmt_info)) >> { >> unsigned int base >> = aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype, >> vec_flags); >> - if (aarch64_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) >> + if (vect_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) >> { >> if (aarch64_sve_mode_p (TYPE_MODE (vectype))) >> { >> @@ -15491,7 +15394,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, >> >> /* Add any embedded comparison operations. */ >> if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar) >> - && aarch64_embedded_comparison_type (stmt_info)) >> + && vect_embedded_comparison_type (stmt_info)) >> ops->general_ops += num_copies; >> >> /* Detect COND_REDUCTIONs and things that would need to become >> @@ -15500,7 +15403,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, >> have only accounted for one. */ >> if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar)) >> { >> - int reduc_type = aarch64_reduc_type (vinfo, stmt_info); >> + int reduc_type = vect_reduc_type (vinfo, stmt_info); >> if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD)) >> || reduc_type == COND_REDUCTION) >> ops->general_ops += num_copies; >> @@ -15508,7 +15411,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, >> >> /* Count the predicate operations needed by an SVE comparison. */ >> if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar)) >> - if (tree type = aarch64_comparison_type (stmt_info)) >> + if (tree type = vect_comparison_type (stmt_info)) >> { >> unsigned int base = (FLOAT_TYPE_P (type) >> ? sve_issue->fp_cmp_pred_ops >> @@ -15586,7 +15489,7 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count, >> /* If we scalarize a strided store, the vectorizer costs one >> vec_to_scalar for each element. However, we can store the first >> element using an FP store without a separate extract step. */ >> - if (aarch64_is_store_elt_extraction (kind, stmt_info)) >> + if (vect_is_store_elt_extraction (kind, stmt_info)) >> count -= 1; >> >> stmt_cost = aarch64_detect_scalar_stmt_subtype
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h index deb22477e28..fd8681747ca 100644 --- a/gcc/tree-vectorizer.h +++ b/gcc/tree-vectorizer.h @@ -2192,4 +2192,108 @@ extern vect_pattern_decl_t slp_patterns[]; /* Number of supported pattern matchers. */ extern size_t num__slp_patterns; +/* ---------------------------------------------------------------------- + Target support routines + ----------------------------------------------------------------------- + The following routines are provided to simplify costing decisions in + target code. Please add more as needed. */ + +/* Return true if an operaton of kind KIND for STMT_INFO represents + the extraction of an element from a vector in preparation for + storing the element to memory. */ +inline bool +vect_is_store_elt_extraction (vect_cost_for_stmt kind, stmt_vec_info stmt_info) +{ + return (kind == vec_to_scalar + && STMT_VINFO_DATA_REF (stmt_info) + && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); +} + +/* Return true if STMT_INFO represents part of a reduction. */ +inline bool +vect_is_reduction (stmt_vec_info stmt_info) +{ + return (STMT_VINFO_REDUC_DEF (stmt_info) + || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); +} + +/* If STMT_INFO describes a reduction, return the type of reduction + it describes, otherwise return -1. */ +inline int +vect_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) +{ + if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) + if (STMT_VINFO_REDUC_DEF (stmt_info)) + { + stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); + return int (STMT_VINFO_REDUC_TYPE (reduc_info)); + } + return -1; +} + +/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the + scalar type of the values being compared. Return null otherwise. */ +inline tree +vect_embedded_comparison_type (stmt_vec_info stmt_info) +{ + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) + if (gimple_assign_rhs_code (assign) == COND_EXPR) + { + tree cond = gimple_assign_rhs1 (assign); + if (COMPARISON_CLASS_P (cond)) + return TREE_TYPE (TREE_OPERAND (cond, 0)); + } + return NULL_TREE; +} + +/* If STMT_INFO is a comparison or contains an embedded comparison, return the + scalar type of the values being compared. Return null otherwise. */ +inline tree +vect_comparison_type (stmt_vec_info stmt_info) +{ + if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) + if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) + return TREE_TYPE (gimple_assign_rhs1 (assign)); + return vect_embedded_comparison_type (stmt_info); +} + +/* Return true if STMT_INFO extends the result of a load. */ +inline bool +vect_is_extending_load (class vec_info *vinfo, stmt_vec_info stmt_info) +{ + /* Although this is quite large for an inline function, this part + at least should be inline. */ + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) + return false; + + tree rhs = gimple_assign_rhs1 (stmt_info->stmt); + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); + tree rhs_type = TREE_TYPE (rhs); + if (!INTEGRAL_TYPE_P (lhs_type) + || !INTEGRAL_TYPE_P (rhs_type) + || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) + return false; + + stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); + return (def_stmt_info + && STMT_VINFO_DATA_REF (def_stmt_info) + && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); +} + +/* Return true if STMT_INFO is an integer truncation. */ +inline bool +vect_is_integer_truncation (stmt_vec_info stmt_info) +{ + gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); + if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) + return false; + + tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); + tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); + return (INTEGRAL_TYPE_P (lhs_type) + && INTEGRAL_TYPE_P (rhs_type) + && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); +} + #endif /* GCC_TREE_VECTORIZER_H */ diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e02cbcbcb38..a4456a86764 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -14790,40 +14790,6 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, } } -/* Return true if an operaton of kind KIND for STMT_INFO represents - the extraction of an element from a vector in preparation for - storing the element to memory. */ -static bool -aarch64_is_store_elt_extraction (vect_cost_for_stmt kind, - stmt_vec_info stmt_info) -{ - return (kind == vec_to_scalar - && STMT_VINFO_DATA_REF (stmt_info) - && DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))); -} - -/* Return true if STMT_INFO represents part of a reduction. */ -static bool -aarch64_is_reduction (stmt_vec_info stmt_info) -{ - return (STMT_VINFO_REDUC_DEF (stmt_info) - || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))); -} - -/* If STMT_INFO describes a reduction, return the type of reduction - it describes, otherwise return -1. */ -static int -aarch64_reduc_type (vec_info *vinfo, stmt_vec_info stmt_info) -{ - if (loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (vinfo)) - if (STMT_VINFO_REDUC_DEF (stmt_info)) - { - stmt_vec_info reduc_info = info_for_reduction (loop_vinfo, stmt_info); - return int (STMT_VINFO_REDUC_TYPE (reduc_info)); - } - return -1; -} - /* Return true if an access of kind KIND for STMT_INFO represents one vector of an LD[234] or ST[234] operation. Return the total number of vectors (2, 3 or 4) if so, otherwise return a value outside that range. */ @@ -14844,32 +14810,6 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info) return 0; } -/* If STMT_INFO is a COND_EXPR that includes an embedded comparison, return the - scalar type of the values being compared. Return null otherwise. */ -static tree -aarch64_embedded_comparison_type (stmt_vec_info stmt_info) -{ - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) - if (gimple_assign_rhs_code (assign) == COND_EXPR) - { - tree cond = gimple_assign_rhs1 (assign); - if (COMPARISON_CLASS_P (cond)) - return TREE_TYPE (TREE_OPERAND (cond, 0)); - } - return NULL_TREE; -} - -/* If STMT_INFO is a comparison or contains an embedded comparison, return the - scalar type of the values being compared. Return null otherwise. */ -static tree -aarch64_comparison_type (stmt_vec_info stmt_info) -{ - if (auto *assign = dyn_cast<gassign *> (stmt_info->stmt)) - if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison) - return TREE_TYPE (gimple_assign_rhs1 (assign)); - return aarch64_embedded_comparison_type (stmt_info); -} - /* Return true if creating multiple copies of STMT_INFO for Advanced SIMD vectors would produce a series of LDP or STP operations. KIND is the kind of statement that STMT_INFO represents. */ @@ -14896,43 +14836,6 @@ aarch64_advsimd_ldp_stp_p (enum vect_cost_for_stmt kind, return is_gimple_assign (stmt_info->stmt); } -/* Return true if STMT_INFO extends the result of a load. */ -static bool -aarch64_extending_load_p (class vec_info *vinfo, stmt_vec_info stmt_info) -{ - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) - return false; - - tree rhs = gimple_assign_rhs1 (stmt_info->stmt); - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); - tree rhs_type = TREE_TYPE (rhs); - if (!INTEGRAL_TYPE_P (lhs_type) - || !INTEGRAL_TYPE_P (rhs_type) - || TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type)) - return false; - - stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); - return (def_stmt_info - && STMT_VINFO_DATA_REF (def_stmt_info) - && DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info))); -} - -/* Return true if STMT_INFO is an integer truncation. */ -static bool -aarch64_integer_truncation_p (stmt_vec_info stmt_info) -{ - gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); - if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))) - return false; - - tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign)); - tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign)); - return (INTEGRAL_TYPE_P (lhs_type) - && INTEGRAL_TYPE_P (rhs_type) - && TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type)); -} - /* Return true if STMT_INFO is the second part of a two-statement multiply-add or multiply-subtract sequence that might be suitable for fusing into a single instruction. If VEC_FLAGS is zero, analyze the operation as @@ -15035,7 +14938,7 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, tree vectype, const sve_vec_cost *sve_costs) { - switch (aarch64_reduc_type (vinfo, stmt_info)) + switch (vect_reduc_type (vinfo, stmt_info)) { case EXTRACT_LAST_REDUCTION: return sve_costs->clast_cost; @@ -15126,7 +15029,7 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, { /* Detect an extension of a loaded value. In general, we'll be able to fuse the extension with the load. */ - if (kind == scalar_stmt && aarch64_extending_load_p (vinfo, stmt_info)) + if (kind == scalar_stmt && vect_is_extending_load (vinfo, stmt_info)) return 0; return stmt_cost; @@ -15158,7 +15061,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, /* Detect cases in which vec_to_scalar is describing the extraction of a vector element in preparation for a scalar store. The store itself is costed separately. */ - if (aarch64_is_store_elt_extraction (kind, stmt_info)) + if (vect_is_store_elt_extraction (kind, stmt_info)) return simd_costs->store_elt_extra_cost; /* Detect SVE gather loads, which are costed as a single scalar_load @@ -15197,7 +15100,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, instruction like FADDP or MAXV. */ if (kind == vec_to_scalar && where == vect_epilogue - && aarch64_is_reduction (stmt_info)) + && vect_is_reduction (stmt_info)) switch (GET_MODE_INNER (TYPE_MODE (vectype))) { case E_QImode: @@ -15247,12 +15150,12 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind, on the fly. Optimistically assume that a load followed by an extension will fold to this form during combine, and that the extension therefore comes for free. */ - if (kind == vector_stmt && aarch64_extending_load_p (vinfo, stmt_info)) + if (kind == vector_stmt && vect_is_extending_load (vinfo, stmt_info)) stmt_cost = 0; /* For similar reasons, vector_stmt integer truncations are a no-op, because we can just ignore the unused upper bits of the source. */ - if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info)) + if (kind == vector_stmt && vect_is_integer_truncation (stmt_info)) stmt_cost = 0; /* Advanced SIMD can load and store pairs of registers using LDP and STP, @@ -15327,7 +15230,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, } if (kind == vector_stmt || kind == vec_to_scalar) - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) { if (FLOAT_TYPE_P (cmp_type)) stmt_cost += simd_costs->fp_stmt_cost; @@ -15337,7 +15240,7 @@ aarch64_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info, } if (kind == scalar_stmt) - if (tree cmp_type = aarch64_embedded_comparison_type (stmt_info)) + if (tree cmp_type = vect_embedded_comparison_type (stmt_info)) { if (FLOAT_TYPE_P (cmp_type)) stmt_cost += aarch64_tune_params.vec_costs->scalar_fp_stmt_cost; @@ -15387,12 +15290,12 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, /* Calculate the minimum cycles per iteration imposed by a reduction operation. */ if ((kind == vector_stmt || kind == vec_to_scalar) - && aarch64_is_reduction (stmt_info)) + && vect_is_reduction (stmt_info)) { unsigned int base = aarch64_in_loop_reduction_latency (vinfo, stmt_info, vectype, vec_flags); - if (aarch64_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) + if (vect_reduc_type (vinfo, stmt_info) == FOLD_LEFT_REDUCTION) { if (aarch64_sve_mode_p (TYPE_MODE (vectype))) { @@ -15491,7 +15394,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, /* Add any embedded comparison operations. */ if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar) - && aarch64_embedded_comparison_type (stmt_info)) + && vect_embedded_comparison_type (stmt_info)) ops->general_ops += num_copies; /* Detect COND_REDUCTIONs and things that would need to become @@ -15500,7 +15403,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, have only accounted for one. */ if (vec_flags && (kind == vector_stmt || kind == vec_to_scalar)) { - int reduc_type = aarch64_reduc_type (vinfo, stmt_info); + int reduc_type = vect_reduc_type (vinfo, stmt_info); if ((reduc_type == EXTRACT_LAST_REDUCTION && (vec_flags & VEC_ADVSIMD)) || reduc_type == COND_REDUCTION) ops->general_ops += num_copies; @@ -15508,7 +15411,7 @@ aarch64_count_ops (class vec_info *vinfo, aarch64_vector_costs *costs, /* Count the predicate operations needed by an SVE comparison. */ if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar)) - if (tree type = aarch64_comparison_type (stmt_info)) + if (tree type = vect_comparison_type (stmt_info)) { unsigned int base = (FLOAT_TYPE_P (type) ? sve_issue->fp_cmp_pred_ops @@ -15586,7 +15489,7 @@ aarch64_add_stmt_cost (class vec_info *vinfo, void *data, int count, /* If we scalarize a strided store, the vectorizer costs one vec_to_scalar for each element. However, we can store the first element using an FP store without a separate extract step. */ - if (aarch64_is_store_elt_extraction (kind, stmt_info)) + if (vect_is_store_elt_extraction (kind, stmt_info)) count -= 1; stmt_cost = aarch64_detect_scalar_stmt_subtype