@@ -90,6 +90,7 @@ enum aarch64_function_type {
/* SVE vector register sizes. */
enum aarch64_sve_vector_bits_enum {
SVE_SCALABLE,
+ SVE_NOT_IMPLEMENTED = SVE_SCALABLE,
SVE_128 = 128,
SVE_256 = 256,
SVE_512 = 512,
@@ -252,6 +252,10 @@ struct tune_params
const struct cpu_vector_cost *vec_costs;
const struct cpu_branch_cost *branch_costs;
const struct cpu_approx_modes *approx_modes;
+ /* Width of the SVE registers or SVE_NOT_IMPLEMENTED if not appicable.
+ Only used for tuning decisions, does not disable VLA
+ vectorization. */
+ enum aarch64_sve_vector_bits_enum sve_width;
int memmov_cost;
int issue_rate;
unsigned int fusible_ops;
@@ -681,6 +681,7 @@ static const struct tune_params generic_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
@@ -706,6 +707,7 @@ static const struct tune_params cortexa35_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
1, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -732,6 +734,7 @@ static const struct tune_params cortexa53_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -758,6 +761,7 @@ static const struct tune_params cortexa57_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -784,6 +788,7 @@ static const struct tune_params cortexa72_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -810,6 +815,7 @@ static const struct tune_params cortexa73_tunings =
&cortexa57_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */
2, /* issue_rate. */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -838,6 +844,7 @@ static const struct tune_params exynosm1_tunings =
&exynosm1_vector_cost,
&generic_branch_cost,
&exynosm1_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_AES_AESMC), /* fusible_ops */
@@ -863,6 +870,7 @@ static const struct tune_params thunderxt88_tunings =
&thunderx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
@@ -888,6 +896,7 @@ static const struct tune_params thunderx_tunings =
&thunderx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fusible_ops */
@@ -914,6 +923,7 @@ static const struct tune_params tsv110_tunings =
&tsv110_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH
@@ -940,6 +950,7 @@ static const struct tune_params xgene1_tunings =
&xgene1_vector_cost,
&generic_branch_cost,
&xgene1_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
@@ -965,6 +976,7 @@ static const struct tune_params emag_tunings =
&xgene1_vector_cost,
&generic_branch_cost,
&xgene1_approx_modes,
+ SVE_NOT_IMPLEMENTED,
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fusible_ops */
@@ -990,6 +1002,7 @@ static const struct tune_params qdf24xx_tunings =
&qdf24xx_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -1018,6 +1031,7 @@ static const struct tune_params saphira_tunings =
&generic_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
@@ -1044,6 +1058,7 @@ static const struct tune_params thunderx2t99_tunings =
&thunderx2t99_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
+ SVE_NOT_IMPLEMENTED, /* sve_width */
4, /* memmov_cost. */
4, /* issue_rate. */
(AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC
@@ -17862,6 +17877,25 @@ aarch64_speculation_safe_value (machine_mode mode,
return result;
}
+/* Implement TARGET_ESTIMATED_POLY_VALUE.
+ Look into the tuning structure for an estimate.
+ VAL.coeffs[1] is multiplied by the number of VQ chunks over the initial
+ Advanced SIMD 128 bits. */
+
+static HOST_WIDE_INT
+aarch64_estimated_poly_value (poly_int64 val)
+{
+ enum aarch64_sve_vector_bits_enum width_source
+ = aarch64_tune_params.sve_width;
+
+ /* If we still don't have an estimate, use the default. */
+ if (width_source == SVE_SCALABLE)
+ return default_estimated_poly_value (val);
+
+ HOST_WIDE_INT over_128 = width_source - 128;
+ return val.coeffs[0] + val.coeffs[1] * over_128 / 128;
+}
+
/* Target-specific selftests. */
#if CHECKING_P
@@ -18341,6 +18375,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_SPECULATION_SAFE_VALUE
#define TARGET_SPECULATION_SAFE_VALUE aarch64_speculation_safe_value
+#undef TARGET_ESTIMATED_POLY_VALUE
+#define TARGET_ESTIMATED_POLY_VALUE aarch64_estimated_poly_value
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests