@@ -31,6 +31,8 @@ along with GCC; see the file COPYING3. If not see
#include "tree-pass.h"
#include "ssa.h"
#include "optabs-tree.h"
+#include "insn-config.h"
+#include "recog.h" /* FIXME: for insn_data */
#include "diagnostic-core.h"
#include "fold-const.h"
#include "stor-layout.h"
@@ -1603,6 +1605,270 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
vectorization_factor);
}
+/* Function vect_check_required_masks_widening.
+
+ Return 1 if vector mask of type MASK_TYPE can be widened
+ to a type having REQ_ELEMS elements in a single vector. */
+
+static bool
+vect_check_required_masks_widening (loop_vec_info loop_vinfo,
+ tree mask_type, unsigned req_elems)
+{
+ unsigned mask_elems = TYPE_VECTOR_SUBPARTS (mask_type);
+
+ gcc_assert (mask_elems > req_elems);
+
+ /* Don't convert if it requires too many intermediate steps. */
+ int steps = exact_log2 (mask_elems / req_elems);
+ if (steps > MAX_INTERM_CVT_STEPS + 1)
+ return false;
+
+ /* Check we have conversion support for given mask mode. */
+ machine_mode mode = TYPE_MODE (mask_type);
+ insn_code icode = optab_handler (vec_unpacks_lo_optab, mode);
+ if (icode == CODE_FOR_nothing
+ || optab_handler (vec_unpacks_hi_optab, mode) == CODE_FOR_nothing)
+ return false;
+
+ /* Make recursive call for multi-step conversion. */
+ if (steps > 1)
+ {
+ mask_elems = mask_elems >> 1;
+ mask_type = build_truth_vector_type (mask_elems, current_vector_size);
+ if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
+ return false;
+
+ if (!vect_check_required_masks_widening (loop_vinfo, mask_type,
+ req_elems))
+ return false;
+ }
+ else
+ {
+ mask_type = build_truth_vector_type (req_elems, current_vector_size);
+ if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
+ return false;
+ }
+
+ return true;
+}
+
+/* Function vect_check_required_masks_narrowing.
+
+ Return 1 if vector mask of type MASK_TYPE can be narrowed
+ to a type having REQ_ELEMS elements in a single vector. */
+
+static bool
+vect_check_required_masks_narrowing (loop_vec_info loop_vinfo,
+ tree mask_type, unsigned req_elems)
+{
+ unsigned mask_elems = TYPE_VECTOR_SUBPARTS (mask_type);
+
+ gcc_assert (req_elems > mask_elems);
+
+ /* Don't convert if it requires too many intermediate steps. */
+ int steps = exact_log2 (req_elems / mask_elems);
+ if (steps > MAX_INTERM_CVT_STEPS + 1)
+ return false;
+
+ /* Check we have conversion support for given mask mode. */
+ machine_mode mode = TYPE_MODE (mask_type);
+ insn_code icode = optab_handler (vec_pack_trunc_optab, mode);
+ if (icode == CODE_FOR_nothing)
+ return false;
+
+ /* Make recursive call for multi-step conversion. */
+ if (steps > 1)
+ {
+ mask_elems = mask_elems << 1;
+ mask_type = build_truth_vector_type (mask_elems, current_vector_size);
+ if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
+ return false;
+
+ if (!vect_check_required_masks_narrowing (loop_vinfo, mask_type,
+ req_elems))
+ return false;
+ }
+ else
+ {
+ mask_type = build_truth_vector_type (req_elems, current_vector_size);
+ if (TYPE_MODE (mask_type) != insn_data[icode].operand[0].mode)
+ return false;
+ }
+
+ return true;
+}
+
+/* Function vect_get_masking_iv_elems.
+
+ Return a number of elements in IV used for loop masking. */
+static int
+vect_get_masking_iv_elems (loop_vec_info loop_vinfo)
+{
+ tree iv_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+ tree iv_vectype = get_vectype_for_scalar_type (iv_type);
+
+ /* We extend IV type in case it is not big enough to
+ fill full vector. */
+ return MIN ((int)TYPE_VECTOR_SUBPARTS (iv_vectype),
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+}
+
+/* Function vect_get_masking_iv_type.
+
+ Return a type of IV used for loop masking. */
+static tree
+vect_get_masking_iv_type (loop_vec_info loop_vinfo)
+{
+ /* Masking IV is to be compared to vector of NITERS and therefore
+ type of NITERS is used as a basic type for IV.
+ FIXME: It can be improved by using smaller size when possible
+ for more efficient masks computation. */
+ tree iv_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
+ tree iv_vectype = get_vectype_for_scalar_type (iv_type);
+ unsigned vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ if (TYPE_VECTOR_SUBPARTS (iv_vectype) <= vf)
+ return iv_vectype;
+
+ unsigned elem_size = current_vector_size * BITS_PER_UNIT / vf;
+ iv_type = build_nonstandard_integer_type (elem_size, TYPE_UNSIGNED (iv_type));
+
+ return get_vectype_for_scalar_type (iv_type);
+}
+
+/* Function vect_get_extreme_masks.
+
+ Determine minimum and maximum number of elements in masks
+ required for masking a loop described by LOOP_VINFO.
+ Computed values are returned in MIN_MASK_ELEMS and
+ MAX_MASK_ELEMS. */
+
+static void
+vect_get_extreme_masks (loop_vec_info loop_vinfo,
+ unsigned *min_mask_elems,
+ unsigned *max_mask_elems)
+{
+ unsigned required_masks = LOOP_VINFO_REQUIRED_MASKS (loop_vinfo);
+ unsigned elems = 1;
+
+ *min_mask_elems = *max_mask_elems = vect_get_masking_iv_elems (loop_vinfo);
+
+ while (required_masks)
+ {
+ if (required_masks & 1)
+ {
+ if (elems < *min_mask_elems)
+ *min_mask_elems = elems;
+ if (elems > *max_mask_elems)
+ *max_mask_elems = elems;
+ }
+ elems = elems << 1;
+ required_masks = required_masks >> 1;
+ }
+}
+
+/* Function vect_check_required_masks.
+
+ For given LOOP_VINFO check all required masks can be computed
+ and add computation cost into loop cost data. */
+
+static void
+vect_check_required_masks (loop_vec_info loop_vinfo)
+{
+ if (!LOOP_VINFO_REQUIRED_MASKS (loop_vinfo))
+ return;
+
+ /* Firstly check we have a proper comparison to get
+ an initial mask. */
+ tree iv_vectype = vect_get_masking_iv_type (loop_vinfo);
+ unsigned iv_elems = TYPE_VECTOR_SUBPARTS (iv_vectype);
+
+ tree mask_type = build_same_sized_truth_vector_type (iv_vectype);
+
+ if (!expand_vec_cmp_expr_p (iv_vectype, mask_type))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: required vector comparison "
+ "is not supported.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ return;
+ }
+
+ int cmp_copies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / iv_elems;
+ /* Add cost of initial iv values creation. */
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
+ scalar_to_vec, NULL, 0, vect_masking_prologue);
+ /* Add cost of upper bound and step values creation. It is the same
+ for all copies. */
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), 2,
+ scalar_to_vec, NULL, 0, vect_masking_prologue);
+ /* Add cost of vector comparisons. */
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
+ vector_stmt, NULL, 0, vect_masking_body);
+ /* Add cost of iv increment. */
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo), cmp_copies,
+ vector_stmt, NULL, 0, vect_masking_body);
+
+
+ /* Now check the widest and the narrowest masks.
+ All intermediate values are obtained while
+ computing extreme values. */
+ unsigned min_mask_elems = 0;
+ unsigned max_mask_elems = 0;
+
+ vect_get_extreme_masks (loop_vinfo, &min_mask_elems, &max_mask_elems);
+
+ if (min_mask_elems < iv_elems)
+ {
+ /* Check mask widening is available. */
+ if (!vect_check_required_masks_widening (loop_vinfo, mask_type,
+ min_mask_elems))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: required mask widening "
+ "is not supported.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ return;
+ }
+
+ /* Add widening cost. We have totally (2^N - 1) vectors
+ we need to widen per each original vector, where N is
+ a number of conversion steps. Each widening requires
+ two extracts. */
+ int steps = exact_log2 (iv_elems / min_mask_elems);
+ int conversions = cmp_copies * 2 * ((1 << steps) - 1);
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo),
+ conversions, vec_promote_demote,
+ NULL, 0, vect_masking_body);
+ }
+
+ if (max_mask_elems > iv_elems)
+ {
+ if (!vect_check_required_masks_narrowing (loop_vinfo, mask_type,
+ max_mask_elems))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: required mask narrowing "
+ "is not supported.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ return;
+ }
+
+ /* Add narrowing cost. We have totally (2^N - 1) vector
+ narrowings per each resulting vector, where N is
+ a number of conversion steps. */
+ int steps = exact_log2 (max_mask_elems / iv_elems);
+ int results = cmp_copies * iv_elems / max_mask_elems;
+ int conversions = results * ((1 << steps) - 1);
+ add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo),
+ conversions, vec_promote_demote,
+ NULL, 0, vect_masking_body);
+ }
+}
+
/* Function vect_analyze_loop_operations.
Scan the loop stmts and make sure they are all vectorizable. */
@@ -1761,6 +2027,12 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
return false;
}
+ /* If all statements can be masked then we also need
+ to check we may compute required masks and compute
+ its cost. */
+ if (LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ vect_check_required_masks (loop_vinfo);
+
return true;
}
@@ -2236,6 +2508,8 @@ again:
LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = true;
+ LOOP_VINFO_NEED_MASKING (loop_vinfo) = false;
goto start_over;
}
@@ -5428,6 +5702,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
outer_loop = loop;
loop = loop->inner;
nested_cycle = true;
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
}
/* 1. Is vectorizable reduction? */
@@ -5627,6 +5902,18 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
gcc_assert (ncopies >= 1);
+ if (slp_node || PURE_SLP_STMT (stmt_info) || code == COND_EXPR
+ || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
+ || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+ == INTEGER_INDUC_COND_REDUCTION)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: unsupported conditional "
+ "reduction\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
vec_mode = TYPE_MODE (vectype_in);
if (code == COND_EXPR)
@@ -5904,6 +6191,19 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
return false;
}
}
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ /* Check that masking of reduction is supported. */
+ tree mask_vtype = build_same_sized_truth_vector_type (vectype_out);
+ if (!expand_vec_cond_expr_p (vectype_out, mask_vtype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: required vector conditional "
+ "expression is not supported.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+ }
if (!vec_stmt) /* transformation not required. */
{
@@ -5912,6 +6212,10 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
reduc_index))
return false;
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ vect_model_simple_masking_cost (stmt_info, ncopies);
+
return true;
}
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3. If not see
#include "tree-vectorizer.h"
#include "builtins.h"
#include "internal-fn.h"
+#include "tree-ssa-loop-ivopts.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
@@ -535,6 +536,38 @@ process_use (gimple *stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
return true;
}
+/* Return true if STMT can be converted to masked form. */
+
+static bool
+can_mask_load_store (gimple *stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ tree vectype, mask_vectype;
+ tree lhs, ref;
+
+ if (!stmt_info)
+ return false;
+ lhs = gimple_assign_lhs (stmt);
+ ref = (TREE_CODE (lhs) == SSA_NAME) ? gimple_assign_rhs1 (stmt) : lhs;
+ if (may_be_nonaddressable_p (ref))
+ return false;
+ vectype = STMT_VINFO_VECTYPE (stmt_info);
+ mask_vectype = build_same_sized_truth_vector_type (vectype);
+ if (!can_vec_mask_load_store_p (TYPE_MODE (vectype),
+ TYPE_MODE (mask_vectype),
+ gimple_assign_load_p (stmt)))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "Statement can't be masked.\n");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ }
+
+ return false;
+ }
+ return true;
+}
/* Function vect_mark_stmts_to_be_vectorized.
@@ -1193,6 +1226,56 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
}
}
+/* Function vect_model_load_masking_cost.
+
+ Models cost for memory load masking. */
+
+void
+vect_model_load_masking_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ /* MASK_LOAD case. */
+ if (gimple_code (stmt_info->stmt) == GIMPLE_CALL)
+ add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
+ ncopies, vector_mask_load, stmt_info, false,
+ vect_masking_body);
+ /* Other loads. */
+ else
+ add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
+ ncopies, vector_load, stmt_info, false,
+ vect_masking_body);
+}
+
+/* Function vect_model_store_masking_cost.
+
+ Models cost for memory store masking. */
+
+void
+vect_model_store_masking_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ /* MASK_STORE case. */
+ if (gimple_code (stmt_info->stmt) == GIMPLE_CALL)
+ add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
+ ncopies, vector_mask_store, stmt_info, false,
+ vect_masking_body);
+ /* Other stores. */
+ else
+ add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
+ ncopies, vector_store, stmt_info, false,
+ vect_masking_body);
+}
+
+/* Function vect_model_simple_masking_cost.
+
+ Models cost for statement masking. Return estimated cost. */
+
+void
+vect_model_simple_masking_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ add_stmt_masking_cost (stmt_info->vinfo->target_cost_data,
+ ncopies, vector_stmt, stmt_info, false,
+ vect_masking_body);
+}
+
/* Insert the new stmt NEW_STMT at *GSI or at the appropriate place in
the loop preheader for the vectorized stmt STMT. */
@@ -1798,6 +1881,20 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
&& !useless_type_conversion_p (vectype, rhs_vectype)))
return false;
+ if (LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ /* Check that mask conjuction is supported. */
+ optab tab;
+ tab = optab_for_tree_code (BIT_AND_EXPR, vectype, optab_default);
+ if (!tab || optab_handler (tab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: unsupported mask operation\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
@@ -1806,6 +1903,15 @@ vectorizable_mask_load_store (gimple *stmt, gimple_stmt_iterator *gsi,
NULL, NULL, NULL);
else
vect_model_load_cost (stmt_info, ncopies, false, NULL, NULL, NULL);
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (is_store)
+ vect_model_store_masking_cost (stmt_info, ncopies);
+ else
+ vect_model_load_masking_cost (stmt_info, ncopies);
+ }
+
return true;
}
@@ -2802,6 +2908,18 @@ vectorizable_simd_clone_call (gimple *stmt, gimple_stmt_iterator *gsi,
if (slp_node)
return false;
+ /* Masked clones are not yet supported. But we allow
+ calls which may be just called with no mask. */
+ if (!(gimple_call_flags (stmt) & ECF_CONST)
+ || (gimple_call_flags (stmt) & ECF_LOOPING_CONST_OR_PURE))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: non-const call "
+ "(masked calls are not supported)\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
/* Process function arguments. */
nargs = gimple_call_num_args (stmt);
@@ -5340,6 +5458,14 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
"negative step and reversing not supported.\n");
return false;
}
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: negative step"
+ " is not supported.");
+ }
}
}
@@ -5348,6 +5474,16 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
grouped_store = true;
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: grouped access"
+ " is not supported." );
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
if (!slp && !STMT_VINFO_STRIDED_P (stmt_info))
{
if (vect_store_lanes_supported (vectype, group_size))
@@ -5401,6 +5537,44 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
"scatter index use not simple.");
return false;
}
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: gather/scatter is"
+ " not supported.");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+ }
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
+ && STMT_VINFO_STRIDED_P (stmt_info))
+ {
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: strided store is not"
+ " supported.\n");
+ }
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
+ && integer_zerop (nested_in_vect_loop_p (loop, stmt)
+ ? STMT_VINFO_DR_STEP (stmt_info)
+ : DR_STEP (dr)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: invariant store.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
+ && !can_mask_load_store (stmt))
+ {
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: unsupported mask store.\n");
}
if (!vec_stmt) /* transformation not required. */
@@ -5410,6 +5584,9 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (!PURE_SLP_STMT (stmt_info))
vect_model_store_cost (stmt_info, ncopies, store_lanes_p, dt,
NULL, NULL, NULL);
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ vect_model_store_masking_cost (stmt_info, ncopies);
+
return true;
}
@@ -6315,6 +6492,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
grouped_load = true;
/* FORNOW */
gcc_assert (!nested_in_vect_loop && !STMT_VINFO_GATHER_SCATTER_P (stmt_info));
+ /* Not yet supported. */
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: grouped access is not"
+ " supported.");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));
@@ -6368,6 +6554,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
}
if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
@@ -6421,6 +6608,16 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
gather_decl = vect_check_gather_scatter (stmt, loop_vinfo, &gather_base,
&gather_off, &gather_scale);
gcc_assert (gather_decl);
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: gather/scatter is not"
+ " supported.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
+
if (!vect_is_simple_use (gather_off, vinfo, &def_stmt, &gather_dt,
&gather_off_vectype))
{
@@ -6432,6 +6629,15 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
}
else if (STMT_VINFO_STRIDED_P (stmt_info))
{
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: strided load is not"
+ " supported.\n");
+ }
+
if (grouped_load
&& slp
&& (group_size > nunits
@@ -6483,9 +6689,35 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
"\n");
return false;
}
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: negative step "
+ "for masking.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
}
}
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
+ && integer_zerop (nested_in_vect_loop
+ ? STMT_VINFO_DR_STEP (stmt_info)
+ : DR_STEP (dr)))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "allow invariant load for masked loop.\n");
+ }
+ else if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo)
+ && !can_mask_load_store (stmt))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: unsupported masked load.\n");
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
@@ -6493,6 +6725,9 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
if (!PURE_SLP_STMT (stmt_info))
vect_model_load_cost (stmt_info, ncopies, load_lanes_p,
NULL, NULL, NULL);
+ if (loop_vinfo && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ vect_model_load_masking_cost (stmt_info, ncopies);
+
return true;
}
@@ -7889,6 +8124,43 @@ vectorizable_comparison (gimple *stmt, gimple_stmt_iterator *gsi,
return true;
}
+/* Return true if vector version of STMT should be masked
+ in a vectorized loop epilogue (considering usage of the
+ same VF as for main loop). */
+
+static bool
+vect_stmt_should_be_masked_for_epilogue (gimple *stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+ /* We should mask all statements accessing memory. */
+ if (STMT_VINFO_DATA_REF (stmt_info))
+ return true;
+
+ /* We should also mask all recursions. */
+ if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+ || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+ return true;
+
+ return false;
+}
+
+/* Add a mask required to mask STMT to LOOP_VINFO_REQUIRED_MASKS. */
+
+static void
+vect_add_required_mask_for_stmt (gimple *stmt)
+{
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ unsigned HOST_WIDE_INT nelems = TYPE_VECTOR_SUBPARTS (vectype);
+ int bit_no = exact_log2 (nelems);
+
+ gcc_assert (bit_no >= 0);
+
+ LOOP_VINFO_REQUIRED_MASKS (loop_vinfo) |= (1 << bit_no);
+}
+
/* Make sure the statement is vectorizable. */
bool
@@ -7896,6 +8168,7 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+ loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
enum vect_relevant relevance = STMT_VINFO_RELEVANT (stmt_info);
bool ok;
tree scalar_type, vectype;
@@ -8062,6 +8335,10 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
STMT_VINFO_VECTYPE (stmt_info) = vectype;
}
+ /* Masking is not supported for SLP yet. */
+ if (loop_vinfo && node)
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+
if (STMT_VINFO_RELEVANT_P (stmt_info))
{
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
@@ -8121,6 +8398,26 @@ vect_analyze_stmt (gimple *stmt, bool *need_to_vectorize, slp_tree node)
return false;
}
+ if (loop_vinfo
+ && LOOP_VINFO_CAN_BE_MASKED (loop_vinfo))
+ {
+ /* Currently we have real masking for loads and stores only.
+ We can't mask loop which has other statements which may
+ trap. */
+ if (gimple_could_trap_p_1 (stmt, false, false))
+ {
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "cannot be masked: unsupported trapping stmt: ");
+ dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
+ }
+ LOOP_VINFO_CAN_BE_MASKED (loop_vinfo) = false;
+ }
+ else if (vect_stmt_should_be_masked_for_epilogue (stmt))
+ vect_add_required_mask_for_stmt (stmt);
+ }
+
if (bb_vinfo)
return true;
@@ -1033,6 +1033,9 @@ extern void vect_model_store_cost (stmt_vec_info, int, bool,
extern void vect_model_load_cost (stmt_vec_info, int, bool, slp_tree,
stmt_vector_for_cost *,
stmt_vector_for_cost *);
+extern void vect_model_load_masking_cost (stmt_vec_info, int);
+extern void vect_model_store_masking_cost (stmt_vec_info, int);
+extern void vect_model_simple_masking_cost (stmt_vec_info, int);
extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
enum vect_cost_for_stmt, stmt_vec_info,
int, enum vect_cost_model_location);