===================================================================
@@ -880,6 +880,14 @@ known_alignment_for_access_p (struct dat
return (DR_MISALIGNMENT (data_ref_info) != -1);
}
+
+/* Return true if the vect cost model is unlimited. */
+static inline bool
+unlimited_cost_model ()
+{
+ return flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED;
+}
+
/* Source location */
extern LOC vect_location;
===================================================================
@@ -191,6 +191,15 @@ enum fp_contract_mode {
FP_CONTRACT_FAST = 2
};
+/* Vectorizer cost-model. */
+enum vect_cost_model {
+ VECT_COST_MODEL_UNLIMITED = 0,
+ VECT_COST_MODEL_CHEAP = 1,
+ VECT_COST_MODEL_DYNAMIC = 2,
+ VECT_COST_MODEL_DEFAULT = 3
+};
+
+
/* Different instrumentation modes. */
enum sanitize_code {
/* AddressSanitizer. */
===================================================================
@@ -1057,20 +1057,17 @@ default_add_stmt_cost (void *data, int c
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
- if (flag_vect_cost_model)
- {
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
- int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = default_builtin_vectorization_cost (kind, vectype,
misalign);
- /* Statements in an inner loop relative to the loop being
- vectorized are weighted more heavily. The value here is
- arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
- count *= 50; /* FIXME. */
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
- retval = (unsigned) (count * stmt_cost);
- cost[where] += retval;
- }
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
return retval;
}
===================================================================
@@ -2278,13 +2278,33 @@ ftree-slp-vectorize
Common Report Var(flag_tree_slp_vectorize) Optimization
Enable basic block vectorization (SLP) on trees
+fvect-cost-model=
+Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT)
+Specifies the cost model for vectorization
+
+Enum
+Name(vect_cost_model) Type(enum vect_cost_model) UnknownError(unknown vectorizer cost model %qs)
+
+EnumValue
+Enum(vect_cost_model) String(unlimited) Value(VECT_COST_MODEL_UNLIMITED)
+
+EnumValue
+Enum(vect_cost_model) String(dynamic) Value(VECT_COST_MODEL_DYNAMIC)
+
+EnumValue
+Enum(vect_cost_model) String(cheap) Value(VECT_COST_MODEL_CHEAP)
+
fvect-cost-model
-Common Report Var(flag_vect_cost_model) Optimization
-Enable use of cost model in vectorization
+Common RejectNegative Alias(fvect-cost-model=,dynamic)
+Enables the dynamic vectorizer cost model. Preserved for backward compatibility.
+
+fno-vect-cost-model
+Common RejectNegative Alias(fvect-cost-model=,unlimited)
+Enables the unlimited vectorizer cost model. Preserved for backward compatibility.
ftree-vect-loop-version
-Common Report Var(flag_tree_vect_loop_version) Init(1) Optimization
-Enable loop versioning when doing loop vectorization on trees
+Common Ignore
+Does nothing. Preserved for backward compatibility.
ftree-scev-cprop
Common Report Var(flag_tree_scev_cprop) Init(1) Optimization
===================================================================
@@ -486,6 +486,7 @@ static const struct default_options defa
{ OPT_LEVELS_2_PLUS, OPT_falign_labels, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_falign_functions, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_ftree_tail_merge, NULL, 1 },
+ { OPT_LEVELS_2_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_CHEAP },
{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_foptimize_strlen, NULL, 1 },
{ OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
@@ -500,7 +501,7 @@ static const struct default_options defa
{ OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model, NULL, 1 },
+ { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
{ OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
{ OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
@@ -825,6 +826,17 @@ finish_options (struct gcc_options *opts
}
}
+ /* Tune vectorization related parametees according to cost model. */
+ if (opts->x_flag_vect_cost_model == VECT_COST_MODEL_CHEAP)
+ {
+ maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS,
+ 6, opts->x_param_values, opts_set->x_param_values);
+ maybe_set_param_value (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS,
+ 0, opts->x_param_values, opts_set->x_param_values);
+ maybe_set_param_value (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT,
+ 0, opts->x_param_values, opts_set->x_param_values);
+ }
+
/* Set PARAM_MAX_STORES_TO_SINK to 0 if either vectorization or if-conversion
is disabled. */
if ((!opts->x_flag_tree_loop_vectorize && !opts->x_flag_tree_slp_vectorize)
@@ -1669,7 +1681,7 @@ common_handle_option (struct gcc_options
&& !opts_set->x_flag_tree_vectorize)
opts->x_flag_tree_slp_vectorize = value;
if (!opts_set->x_flag_vect_cost_model)
- opts->x_flag_vect_cost_model = value;
+ opts->x_flag_vect_cost_model = VECT_COST_MODEL_DYNAMIC;
if (!opts_set->x_flag_tree_loop_distribute_patterns)
opts->x_flag_tree_loop_distribute_patterns = value;
/* Indirect call profiling should do all useful transformations
===================================================================
@@ -811,7 +811,6 @@ ix86_option_init_struct (struct gcc_opti
opts->x_flag_pcc_struct_return = 2;
opts->x_flag_asynchronous_unwind_tables = 2;
- opts->x_flag_vect_cost_model = 1;
}
/* On the x86 -fsplit-stack and -fstack-protector both use the same
===================================================================
@@ -2168,7 +2168,7 @@ vect_slp_analyze_bb_1 (basic_block bb)
}
/* Cost model: check if the vectorization is worthwhile. */
- if (flag_vect_cost_model
+ if (!unlimited_cost_model ()
&& !vect_bb_vectorization_profitable_p (bb_vinfo))
{
if (dump_enabled_p ())
===================================================================
@@ -2680,7 +2680,7 @@ vect_estimate_min_profitable_iters (loop
void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
/* Cost model disabled. */
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
{
dump_printf_loc (MSG_NOTE, vect_location, "cost model disabled.\n");
*ret_min_profitable_niters = 0;
===================================================================
@@ -1115,7 +1115,7 @@ vect_peeling_hash_insert (loop_vec_info
*new_slot = slot;
}
- if (!supportable_dr_alignment && !flag_vect_cost_model)
+ if (!supportable_dr_alignment && unlimited_cost_model ())
slot->count += VECT_MAX_COST;
}
@@ -1225,7 +1225,7 @@ vect_peeling_hash_choose_best_peeling (l
res.peel_info.dr = NULL;
res.body_cost_vec = stmt_vector_for_cost();
- if (flag_vect_cost_model)
+ if (!unlimited_cost_model ())
{
res.inside_cost = INT_MAX;
res.outside_cost = INT_MAX;
@@ -1454,7 +1454,7 @@ vect_enhance_data_refs_alignment (loop_v
vectorization factor.
We do this automtically for cost model, since we calculate cost
for every peeling option. */
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
possible_npeel_number = vf /nelements;
/* Handle the aligned case. We may decide to align some other
@@ -1462,7 +1462,7 @@ vect_enhance_data_refs_alignment (loop_v
if (DR_MISALIGNMENT (dr) == 0)
{
npeel_tmp = 0;
- if (!flag_vect_cost_model)
+ if (unlimited_cost_model ())
possible_npeel_number++;
}
@@ -1795,16 +1795,14 @@ vect_enhance_data_refs_alignment (loop_v
/* (2) Versioning to force alignment. */
/* Try versioning if:
- 1) flag_tree_vect_loop_version is TRUE
- 2) optimize loop for speed
- 3) there is at least one unsupported misaligned data ref with an unknown
+ 1) optimize loop for speed
+ 2) there is at least one unsupported misaligned data ref with an unknown
misalignment, and
- 4) all misaligned data refs with a known misalignment are supported, and
- 5) the number of runtime alignment checks is within reason. */
+ 3) all misaligned data refs with a known misalignment are supported, and
+ 4) the number of runtime alignment checks is within reason. */
do_versioning =
- flag_tree_vect_loop_version
- && optimize_loop_nest_for_speed_p (loop)
+ optimize_loop_nest_for_speed_p (loop)
&& (!loop->inner); /* FORNOW */
if (do_versioning)
===================================================================
@@ -423,7 +423,7 @@ Objective-C and Objective-C++ Dialects}.
-ftree-parallelize-loops=@var{n} -ftree-pre -ftree-partial-pre -ftree-pta @gol
-ftree-reassoc -ftree-sink -ftree-slsr -ftree-sra @gol
-ftree-switch-conversion -ftree-tail-merge -ftree-ter @gol
--ftree-vect-loop-version -ftree-vectorize -ftree-vrp @gol
+-ftree-vectorize -ftree-vrp @gol
-funit-at-a-time -funroll-all-loops -funroll-loops @gol
-funsafe-loop-optimizations -funsafe-math-optimizations -funswitch-loops @gol
-fvariable-expansion-in-unroller -fvect-cost-model -fvpt -fweb @gol
@@ -6770,7 +6770,7 @@ optimizations designed to reduce code si
@option{-Os} disables the following optimization flags:
@gccoptlist{-falign-functions -falign-jumps -falign-loops @gol
-falign-labels -freorder-blocks -freorder-blocks-and-partition @gol
--fprefetch-loop-arrays -ftree-vect-loop-version}
+-fprefetch-loop-arrays}
@item -Ofast
@opindex Ofast
@@ -8025,19 +8025,20 @@ Perform loop vectorization on trees. Thi
Perform basic block vectorization on trees. This flag is enabled by default at
@option{-O3} and when @option{-ftree-vectorize} is enabled.
-@item -ftree-vect-loop-version
-@opindex ftree-vect-loop-version
-Perform loop versioning when doing loop vectorization on trees. When a loop
-appears to be vectorizable except that data alignment or data dependence cannot
-be determined at compile time, then vectorized and non-vectorized versions of
-the loop are generated along with run-time checks for alignment or dependence
-to control which version is executed. This option is enabled by default
-except at level @option{-Os} where it is disabled.
-
-@item -fvect-cost-model
+@item -fvect-cost-model=@var{model}
@opindex fvect-cost-model
-Enable cost model for vectorization. This option is enabled by default at
-@option{-O3}.
+Alter the cost model used for vectorization. The @var{model} argument
+should be one of @code{unlimited}, @code{dynamic} or @code{cheap}.
+With the @code{unlimited} model the vectorized code-path is assumed
+to be profitable while with the @code{dynamic} model a runtime check
+will guard the vectorized code-path to enable it only for iteration
+counts that will likely execute faster than when executing the original
+scalar loop. The @code{cheap} model will disable vectorization of
+loops where doing so would be cost prohibitive for example due to
+required runtime checks for data dependence or alignment but otherwise
+is equal to the @code{dynamic} model.
+The default cost model depends on other optimization flags and is
+either @code{dynamic} or @code{cheap}.
@item -ftree-vrp
@opindex ftree-vrp
@@ -9443,13 +9444,11 @@ constraints. The default value is 0.
@item vect-max-version-for-alignment-checks
The maximum number of run-time checks that can be performed when
-doing loop versioning for alignment in the vectorizer. See option
-@option{-ftree-vect-loop-version} for more information.
+doing loop versioning for alignment in the vectorizer.
@item vect-max-version-for-alias-checks
The maximum number of run-time checks that can be performed when
-doing loop versioning for alias in the vectorizer. See option
-@option{-ftree-vect-loop-version} for more information.
+doing loop versioning for alias in the vectorizer.
@item vect-max-peeling-for-alignment
The maximum number of loop peels to enhance access alignment
===================================================================
@@ -42782,20 +42782,17 @@ ix86_add_stmt_cost (void *data, int coun
unsigned *cost = (unsigned *) data;
unsigned retval = 0;
- if (flag_vect_cost_model)
- {
- tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
- int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
+ tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+ int stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
- /* Statements in an inner loop relative to the loop being
- vectorized are weighted more heavily. The value here is
- arbitrary and could potentially be improved with analysis. */
- if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
- count *= 50; /* FIXME. */
+ /* Statements in an inner loop relative to the loop being
+ vectorized are weighted more heavily. The value here is
+ arbitrary and could potentially be improved with analysis. */
+ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+ count *= 50; /* FIXME. */
- retval = (unsigned) (count * stmt_cost);
- cost[where] += retval;
- }
+ retval = (unsigned) (count * stmt_cost);
+ cost[where] += retval;
return retval;
}