@@ -995,6 +995,8 @@ namespace aarch64_sve {
#ifdef GCC_TARGET_H
bool verify_type_context (location_t, type_context_kind, const_tree, bool);
#endif
+ void add_sve_type_attribute (tree, unsigned int, unsigned int,
+ const char *, const char *);
}
extern void aarch64_split_combinev16qi (rtx operands[3]);
@@ -569,14 +569,16 @@ static bool reported_missing_registers_p;
/* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors
and NUM_PR SVE predicates. MANGLED_NAME, if nonnull, is the ABI-defined
mangling of the type. ACLE_NAME is the <arm_sve.h> name of the type. */
-static void
+void
add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
const char *mangled_name, const char *acle_name)
{
tree mangled_name_tree
= (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
+ tree acle_name_tree
+ = (acle_name ? get_identifier (acle_name) : NULL_TREE);
- tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
+ tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
value = tree_cons (NULL_TREE, mangled_name_tree, value);
value = tree_cons (NULL_TREE, size_int (num_pr), value);
value = tree_cons (NULL_TREE, size_int (num_zr), value);
@@ -4015,13 +4015,13 @@ aarch64_takes_arguments_in_sve_regs_p (const_tree fntype)
static const predefined_function_abi &
aarch64_fntype_abi (const_tree fntype)
{
- if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
- return aarch64_simd_abi ();
-
if (aarch64_returns_value_in_sve_regs_p (fntype)
|| aarch64_takes_arguments_in_sve_regs_p (fntype))
return aarch64_sve_abi ();
+ if (lookup_attribute ("aarch64_vector_pcs", TYPE_ATTRIBUTES (fntype)))
+ return aarch64_simd_abi ();
+
return default_function_abi;
}
@@ -26968,14 +26968,21 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
}
}
- clonei->vecsize_mangle = 'n';
clonei->mask_mode = VOIDmode;
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
if (known_eq (clonei->simdlen, 0U))
{
- count = 2;
- vec_bits = (num == 0 ? 64 : 128);
- clonei->simdlen = exact_div (vec_bits, elt_bits);
+ if (num >= 2)
+ {
+ vec_bits = poly_uint64 (128, 128);
+ clonei->simdlen = exact_div (vec_bits, elt_bits);
+ }
+ else
+ {
+ count = 3;
+ vec_bits = (num == 0 ? 64 : 128);
+ clonei->simdlen = exact_div (vec_bits, elt_bits);
+ }
}
else
{
@@ -26994,6 +27001,15 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
return 0;
}
}
+
+ if (num >= 2)
+ {
+ clonei->vecsize_mangle = 's';
+ clonei->inbranch = 1;
+ }
+ else
+ clonei->vecsize_mangle = 'n';
+
clonei->vecsize_int = vec_bits;
clonei->vecsize_float = vec_bits;
return count;
@@ -27010,17 +27026,28 @@ aarch64_simd_clone_adjust (struct cgraph_node *node)
tree t = TREE_TYPE (node->decl);
TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
TYPE_ATTRIBUTES (t));
+ if (node->simdclone->vecsize_mangle == 's')
+ {
+ tree target = build_string (strlen ("+sve"), "+sve");
+ aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0);
+ }
}
/* Implement TARGET_SIMD_CLONE_USABLE. */
static int
-aarch64_simd_clone_usable (struct cgraph_node *node)
+aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode)
{
switch (node->simdclone->vecsize_mangle)
{
case 'n':
- if (!TARGET_SIMD)
+ if (!TARGET_SIMD
+ || aarch64_sve_mode_p (vector_mode))
+ return -1;
+ return 0;
+ case 's':
+ if (!TARGET_SVE
+ || !aarch64_sve_mode_p (vector_mode))
return -1;
return 0;
default:
@@ -27028,6 +27055,61 @@ aarch64_simd_clone_usable (struct cgraph_node *node)
}
}
+/* Implement TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM. */
+
+static tree
+aarch64_simd_clone_adjust_ret_or_param (struct cgraph_node *node, tree type,
+ bool is_mask)
+{
+ if (type
+ && VECTOR_TYPE_P (type)
+ && node->simdclone->vecsize_mangle == 's')
+ {
+ cl_target_option cur_target;
+ cl_target_option_save (&cur_target, &global_options, &global_options_set);
+ tree new_target = DECL_FUNCTION_SPECIFIC_TARGET (node->decl);
+ cl_target_option_restore (&global_options, &global_options_set,
+ TREE_TARGET_OPTION (new_target));
+ aarch64_override_options_internal (&global_options);
+ bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
+ memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
+ sizeof (have_regs_of_mode));
+ for (int i = 0; i < NUM_MACHINE_MODES; ++i)
+ if (aarch64_sve_mode_p ((machine_mode) i))
+ have_regs_of_mode[i] = true;
+ poly_uint16 old_sve_vg = aarch64_sve_vg;
+ if (!node->simdclone->simdlen.is_constant ())
+ aarch64_sve_vg = poly_uint16 (2, 2);
+ unsigned int num_zr = 0;
+ unsigned int num_pr = 0;
+ if (is_mask)
+ {
+ type = truth_type_for (type);
+ num_pr = 1;
+ }
+ else
+ {
+ num_zr = 1;
+ tree base_type = TREE_TYPE (type);
+ if (POINTER_TYPE_P (base_type))
+ base_type = pointer_sized_int_node;
+ poly_int64 vec_size = tree_to_poly_int64 (TYPE_SIZE (type));
+ scalar_mode base_mode = as_a <scalar_mode> (TYPE_MODE (base_type));
+ machine_mode vec_mode
+ = aarch64_simd_container_mode (base_mode, vec_size);
+ type = build_vector_type_for_mode (base_type, vec_mode);
+ }
+
+ aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
+ cl_target_option_restore (&global_options, &global_options_set, &cur_target);
+ aarch64_override_options_internal (&global_options);
+ memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
+ sizeof (have_regs_of_mode));
+ aarch64_sve_vg = old_sve_vg;
+ }
+ return type;
+}
+
/* Implement TARGET_COMP_TYPE_ATTRIBUTES */
static int
@@ -28048,6 +28130,10 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_SIMD_CLONE_USABLE
#define TARGET_SIMD_CLONE_USABLE aarch64_simd_clone_usable
+#undef TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM
+#define TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM \
+ aarch64_simd_clone_adjust_ret_or_param
+
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
@@ -6306,11 +6306,16 @@ This hook should add implicit @code{attribute(target("..."))} attribute
to SIMD clone @var{node} if needed.
@end deftypefn
-@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{})
+@deftypefn {Target Hook} int TARGET_SIMD_CLONE_USABLE (struct cgraph_node *@var{}, @var{machine_mode})
This hook should return -1 if SIMD clone @var{node} shouldn't be used
-in vectorized loops in current function, or non-negative number if it is
-usable. In that case, the smaller the number is, the more desirable it is
-to use it.
+in vectorized loops being vectorized with mode @var{m} in current function, or
+non-negative number if it is usable. In that case, the smaller the number is,
+the more desirable it is to use it.
+@end deftypefn
+
+@deftypefn {Target Hook} tree TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM (struct cgraph_node *@var{}, @var{tree}, @var{bool})
+If defined, this hook should adjust the type of the return or parameter
+@var{type} to be used by the simd clone @var{node}.
@end deftypefn
@deftypefn {Target Hook} int TARGET_SIMT_VF (void)
@@ -4205,6 +4205,8 @@ address; but often a machine-dependent strategy can generate better code.
@hook TARGET_SIMD_CLONE_USABLE
+@hook TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM
+
@hook TARGET_SIMT_VF
@hook TARGET_OMP_DEVICE_KIND_ARCH_ISA
@@ -378,8 +378,9 @@ simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
arg_type = SIMD_CLONE_ARG_TYPE_LINEAR_VARIABLE_STEP;
clone_info->args[argno].arg_type = arg_type;
clone_info->args[argno].linear_step = tree_to_shwi (step);
+ int nargs = clone_info->nargs;
gcc_assert (clone_info->args[argno].linear_step >= 0
- && clone_info->args[argno].linear_step < n);
+ && clone_info->args[argno].linear_step < nargs);
}
else
{
@@ -541,9 +542,12 @@ simd_clone_mangle (struct cgraph_node *node,
pp_string (&pp, "_ZGV");
pp_character (&pp, vecsize_mangle);
pp_character (&pp, mask);
- /* For now, simdlen is always constant, while variable simdlen pp 'n'. */
- unsigned int len = simdlen.to_constant ();
- pp_decimal_int (&pp, (len));
+
+ unsigned long long len = 0;
+ if (simdlen.is_constant (&len))
+ pp_decimal_int (&pp, (int) (len));
+ else
+ pp_character (&pp, 'x');
for (n = 0; n < clone_info->nargs; ++n)
{
@@ -736,6 +740,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
t = build_array_type_nelts (t, exact_div (node->simdclone->simdlen,
veclen));
}
+ t = targetm.simd_clone.adjust_ret_or_param (node, t, false);
TREE_TYPE (TREE_TYPE (fndecl)) = t;
if (!node->definition)
return NULL_TREE;
@@ -748,6 +753,7 @@ simd_clone_adjust_return_type (struct cgraph_node *node)
tree atype = build_array_type_nelts (orig_rettype,
node->simdclone->simdlen);
+ atype = targetm.simd_clone.adjust_ret_or_param (node, atype, false);
if (maybe_ne (veclen, node->simdclone->simdlen))
return build1 (VIEW_CONVERT_EXPR, atype, t);
@@ -807,8 +813,14 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
{
ipa_adjusted_param adj;
memset (&adj, 0, sizeof (adj));
- tree parm = args[i];
- tree parm_type = node->definition ? TREE_TYPE (parm) : parm;
+ tree parm = NULL_TREE;
+ tree parm_type = NULL_TREE;
+ if(i < args.length())
+ {
+ parm = args[i];
+ parm_type = node->definition ? TREE_TYPE (parm) : parm;
+ }
+
adj.base_index = i;
adj.prev_clone_index = i;
@@ -874,6 +886,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
? IDENTIFIER_POINTER (DECL_NAME (parm))
: NULL, parm_type, sc->simdlen);
}
+ adj.type = targetm.simd_clone.adjust_ret_or_param (node, adj.type,
+ false);
vec_safe_push (new_params, adj);
}
@@ -906,6 +920,8 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
adj.type = build_vector_type (pointer_sized_int_node, veclen);
else
adj.type = build_vector_type (base_type, veclen);
+ adj.type = targetm.simd_clone.adjust_ret_or_param (node, adj.type,
+ true);
vec_safe_push (new_params, adj);
k = vector_unroll_factor (sc->simdlen, veclen);
@@ -931,6 +947,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node)
sc->args[i].simd_array = NULL_TREE;
}
sc->args[i].orig_type = base_type;
+ sc->args[i].vector_type = adj.type;
sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK;
}
@@ -1485,8 +1502,8 @@ simd_clone_adjust (struct cgraph_node *node)
below). */
loop = alloc_loop ();
cfun->has_force_vectorize_loops = true;
- /* For now, simlen is always constant. */
- loop->safelen = node->simdclone->simdlen.to_constant ();
+ /* We can assert that safelen is the 'minimum' simdlen. */
+ loop->safelen = constant_lower_bound (node->simdclone->simdlen);
loop->force_vectorize = true;
loop->header = body_bb;
}
@@ -1546,7 +1563,7 @@ simd_clone_adjust (struct cgraph_node *node)
mask = gimple_assign_lhs (g);
g = gimple_build_assign (make_ssa_name (TREE_TYPE (mask)),
BIT_AND_EXPR, mask,
- build_int_cst (TREE_TYPE (mask), 1));
+ build_one_cst (TREE_TYPE (mask)));
gsi_insert_after (&gsi, g, GSI_CONTINUE_LINKING);
mask = gimple_assign_lhs (g);
}
@@ -1645,10 +1645,18 @@ void, (struct cgraph_node *), NULL)
DEFHOOK
(usable,
"This hook should return -1 if SIMD clone @var{node} shouldn't be used\n\
-in vectorized loops in current function, or non-negative number if it is\n\
-usable. In that case, the smaller the number is, the more desirable it is\n\
-to use it.",
-int, (struct cgraph_node *), NULL)
+in vectorized loops being vectorized with mode @var{m} in current function, or\n\
+non-negative number if it is usable. In that case, the smaller the number is,\n\
+the more desirable it is to use it.",
+int, (struct cgraph_node *, machine_mode), NULL)
+
+DEFHOOK
+(adjust_ret_or_param,
+"If defined, this hook should adjust the type of the return or parameter\n\
+@var{type} to be used by the simd clone @var{node}.",
+tree, (struct cgraph_node *, tree, bool),
+default_simd_clone_adjust_ret_or_param)
+
HOOK_VECTOR_END (simd_clone)
@@ -73,6 +73,9 @@ extern void default_print_operand (FILE *, rtx, int);
extern void default_print_operand_address (FILE *, machine_mode, rtx);
extern bool default_print_operand_punct_valid_p (unsigned char);
extern tree default_mangle_assembler_name (const char *);
+extern tree default_simd_clone_adjust_ret_or_param
+ (struct cgraph_node *,tree , bool);
+
extern machine_mode default_translate_mode_attribute (machine_mode);
extern bool default_scalar_mode_supported_p (scalar_mode);
@@ -398,6 +398,16 @@ default_mangle_assembler_name (const char *name ATTRIBUTE_UNUSED)
return get_identifier (stripped);
}
+/* The default implementation of TARGET_SIMD_CLONE_ADJUST_RET_OR_PARAM. */
+
+tree
+default_simd_clone_adjust_ret_or_param (struct cgraph_node *node ATTRIBUTE_UNUSED,
+ tree type,
+ bool is_return ATTRIBUTE_UNUSED)
+{
+ return type;
+}
+
/* The default implementation of TARGET_TRANSLATE_MODE_ATTRIBUTE. */
machine_mode
@@ -2759,7 +2759,8 @@ vect_build_all_ones_mask (vec_info *vinfo,
{
if (TREE_CODE (masktype) == INTEGER_TYPE)
return build_int_cst (masktype, -1);
- else if (TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
+ else if (VECTOR_BOOLEAN_TYPE_P (masktype)
+ || TREE_CODE (TREE_TYPE (masktype)) == INTEGER_TYPE)
{
tree mask = build_int_cst (TREE_TYPE (masktype), -1);
mask = build_vector_from_val (masktype, mask);
@@ -4136,14 +4137,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
}
poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- if (!vf.is_constant ())
- {
- if (dump_enabled_p ())
- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "not considering SIMD clones; not yet supported"
- " for variable-width vectors.\n");
- return false;
- }
unsigned int badness = 0;
struct cgraph_node *bestn = NULL;
@@ -4156,20 +4149,17 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
unsigned int this_badness = 0;
unsigned int num_calls;
if (!constant_multiple_p (vf, n->simdclone->simdlen, &num_calls)
- || n->simdclone->nargs != nargs)
+ || n->simdclone->nargs != (nargs + n->simdclone->inbranch))
continue;
if (num_calls != 1)
this_badness += exact_log2 (num_calls) * 4096;
if (n->simdclone->inbranch)
this_badness += 8192;
- int target_badness = targetm.simd_clone.usable (n);
+ int target_badness = targetm.simd_clone.usable (n, vinfo->vector_mode);
if (target_badness < 0)
continue;
this_badness += target_badness * 512;
- /* FORNOW: Have to add code to add the mask argument. */
- if (n->simdclone->inbranch)
- continue;
- for (i = 0; i < nargs; i++)
+ for (i = 0; i < n->simdclone->nargs; i++)
{
switch (n->simdclone->args[i].arg_type)
{
@@ -4206,16 +4196,22 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
i = -1;
break;
case SIMD_CLONE_ARG_TYPE_MASK:
- gcc_unreachable ();
+ /* Penalize using a predicated SIMD clone in a non-masked loop,
+ as we'd have to needlessly construct an all-true mask. */
+ if (!loop_vinfo || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ this_badness += 64;
+ break;
}
if (i == (size_t) -1)
break;
- if (n->simdclone->args[i].alignment > arginfo[i].align)
+ if (i < nargs
+ && n->simdclone->args[i].alignment > arginfo[i].align)
{
i = -1;
break;
}
- if (arginfo[i].align)
+ if (i < nargs
+ && arginfo[i].align)
this_badness += (exact_log2 (arginfo[i].align)
- exact_log2 (n->simdclone->args[i].alignment));
}
@@ -4248,6 +4244,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
fndecl = bestn->decl;
nunits = bestn->simdclone->simdlen;
ncopies = vector_unroll_factor (vf, nunits);
+ nargs = bestn->simdclone->nargs;
/* If the function isn't const, only allow it in simd loops where user
has asserted that at least nunits consecutive iterations can be
@@ -4331,11 +4328,45 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
for (i = 0; i < nargs; i++)
{
- unsigned int k, l, m, o;
+ unsigned long long k, l, m, o;
tree atype;
- op = gimple_call_arg (stmt, i);
+ if (i < gimple_call_num_args (stmt))
+ op = gimple_call_arg (stmt, i);
+ else
+ op = NULL_TREE;
+
switch (bestn->simdclone->args[i].arg_type)
{
+ case SIMD_CLONE_ARG_TYPE_MASK:
+ {
+ tree mask;
+ atype = bestn->simdclone->args[i].vector_type;
+ if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+ {
+ vec_loop_masks *loop_masks
+ = &LOOP_VINFO_MASKS (loop_vinfo);
+ mask = vect_get_loop_mask (gsi, loop_masks, ncopies,
+ vectype, j);
+ }
+ else
+ {
+ tree mask_type = bestn->simdclone->args[i].vector_type;
+ mask
+ = vect_build_all_ones_mask (vinfo, stmt_info,
+ mask_type);
+ }
+ if (!useless_type_conversion_p (TREE_TYPE (mask), atype))
+ {
+ mask = build1 (VIEW_CONVERT_EXPR, atype, mask);
+ gassign *new_stmt
+ = gimple_build_assign (make_ssa_name (atype), mask);
+ vect_finish_stmt_generation (vinfo, stmt_info,
+ new_stmt, gsi);
+ mask = gimple_assign_lhs (new_stmt);
+ }
+ vargs.safe_push (mask);
+ }
+ break;
case SIMD_CLONE_ARG_TYPE_VECTOR:
atype = bestn->simdclone->args[i].vector_type;
o = vector_unroll_factor (nunits,