@@ -810,6 +810,14 @@ struct GTY(()) cgraph_simd_clone {
/* Original cgraph node the SIMD clones were created for. */
cgraph_node *origin;
+ /* This is a flag to indicate what device was selected for the variant
+ clone. Always 0 for 'omp declare simd' clones. */
+ unsigned device;
+
+ /* The identifier for the name of the variant in case of a declare variant
+ clone, this is NULL_TREE for declare simd clones. */
+ tree variant_name;
+
/* Annotated function arguments for the original function. */
cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
};
@@ -26970,15 +26970,28 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
clonei->mask_mode = VOIDmode;
elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+ /* A simdclone without simdlen can legally originate from either a:
+ 'omp declare simd':
+ In this case generate at least 3 simd clones, one for Advanced SIMD
+ 64-bit vectors, one for Advanced SIMD 128-bit vectors and one for SVE
+ vector length agnostic vectors.
+ 'omp declare variant':
+ In this case we must be generating a simd clone for SVE vector length
+ agnostic vectors.
+ */
if (known_eq (clonei->simdlen, 0U))
{
- if (num >= 2)
+ if (clonei->device == 2 || num >= 2)
{
+ count = 1;
vec_bits = poly_uint64 (128, 128);
clonei->simdlen = exact_div (vec_bits, elt_bits);
}
else
{
+ if (clonei->device != 0)
+ return 0;
+
count = 3;
vec_bits = (num == 0 ? 64 : 128);
clonei->simdlen = exact_div (vec_bits, elt_bits);
@@ -26991,7 +27004,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
/* For now, SVE simdclones won't produce illegal simdlen, So only check
const simdlens here. */
if (clonei->simdlen.is_constant (&const_simdlen)
- && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
+ /* For Advanced SIMD we require either 64- or 128-bit vectors. */
+ && ((clonei->device < 2
+ && maybe_ne (vec_bits, 64U)
+ && maybe_ne (vec_bits, 128U))
+ /* For SVE we require multiples of 128-bits. TODO: should we check
+ for max VL? */
+ || (clonei->device == 2
+ && !constant_multiple_p (vec_bits, 128))))
{
if (explicit_p)
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
@@ -27002,7 +27022,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
}
}
- if (num >= 2)
+ if (clonei->device == 2 || num >= 2)
{
clonei->vecsize_mangle = 's';
clonei->inbranch = 1;
@@ -27082,22 +27102,21 @@ aarch64_simd_clone_adjust_ret_or_param (struct cgraph_node *node, tree type,
aarch64_sve_vg = poly_uint16 (2, 2);
unsigned int num_zr = 0;
unsigned int num_pr = 0;
+ tree base_type = TREE_TYPE (type);
+ if (POINTER_TYPE_P (base_type))
+ base_type = pointer_sized_int_node;
+ scalar_mode base_mode = as_a <scalar_mode> (TYPE_MODE (base_type));
+ machine_mode vec_mode = aarch64_full_sve_mode (base_mode).require ();
+ tree vectype = build_vector_type_for_mode (base_type, vec_mode);
if (is_mask)
{
- type = truth_type_for (type);
num_pr = 1;
+ type = truth_type_for (vectype);
}
else
{
num_zr = 1;
- tree base_type = TREE_TYPE (type);
- if (POINTER_TYPE_P (base_type))
- base_type = pointer_sized_int_node;
- poly_int64 vec_size = tree_to_poly_int64 (TYPE_SIZE (type));
- scalar_mode base_mode = as_a <scalar_mode> (TYPE_MODE (base_type));
- machine_mode vec_mode
- = aarch64_simd_container_mode (base_mode, vec_size);
- type = build_vector_type_for_mode (base_type, vec_mode);
+ type = vectype;
}
aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
@@ -27223,6 +27242,22 @@ aarch64_can_tag_addresses ()
return !TARGET_ILP32;
}
+int
+aarch64_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
+ const char *name)
+{
+ if (trait != omp_device_isa)
+ return default_omp_device_kind_arch_isa (trait, name);
+
+ if (strncmp (name, "simd", strlen ("simd")) == 0)
+ return 1;
+ if (strncmp (name, "sve", strlen ("sve")) == 0
+ && TARGET_SVE)
+ return 2;
+
+ return 0;
+}
+
/* Implement TARGET_ASM_FILE_END for AArch64. This adds the AArch64 GNU NOTE
section at the end if needed. */
#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
@@ -28146,6 +28181,9 @@ aarch64_libgcc_floating_mode_supported_p
#undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
#define TARGET_MEMTAG_CAN_TAG_ADDRESSES aarch64_can_tag_addresses
+#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
+#define TARGET_OMP_DEVICE_KIND_ARCH_ISA aarch64_omp_device_kind_arch_isa
+
#if CHECKING_P
#undef TARGET_RUN_TARGET_SELFTESTS
#define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
@@ -7998,6 +7998,18 @@ decl_maybe_constant_destruction (tree decl, tree type)
static tree declare_simd_adjust_this (tree *, int *, void *);
+tree declare_variant_adjust_parm (tree *tp, int *walk_subtrees, void *data)
+{
+ tree *parm = (tree *) data;
+ if (TREE_CODE (*tp) == FUNCTION_DECL)
+ {
+ *parm = DECL_ARGUMENTS (*tp);
+ *walk_subtrees = 0;
+ return NULL_TREE;
+ }
+ return NULL_TREE;
+}
+
/* Helper function of omp_declare_variant_finalize. Finalize one
"omp declare variant base" attribute. Return true if it should be
removed. */
@@ -8015,13 +8027,14 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
tree ctx = TREE_VALUE (TREE_VALUE (attr));
tree simd = omp_get_context_selector (ctx, "construct", "simd");
+ tree parm = NULL_TREE;
if (simd)
{
TREE_VALUE (simd)
= c_omp_declare_simd_clauses_to_numbers (DECL_ARGUMENTS (decl),
TREE_VALUE (simd));
- /* FIXME, adjusting simd args unimplemented. */
- return true;
+ walk_tree (&TREE_PURPOSE (TREE_VALUE (attr)), declare_variant_adjust_parm,
+ &parm, NULL);
}
tree chain = TREE_CHAIN (TREE_VALUE (attr));
@@ -8035,7 +8048,8 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
input_location = varid_loc;
releasing_vec args;
- tree parm = DECL_ARGUMENTS (decl);
+ if (!parm)
+ parm = DECL_ARGUMENTS (decl);
if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE)
parm = DECL_CHAIN (parm);
for (; parm; parm = DECL_CHAIN (parm))
@@ -8096,7 +8110,9 @@ omp_declare_variant_finalize_one (tree decl, tree attr)
if (variant)
{
const char *varname = IDENTIFIER_POINTER (DECL_NAME (variant));
- if (!comptypes (TREE_TYPE (decl), TREE_TYPE (variant), 0))
+ /* TODO: Should we check that if (simd) the return vector type has an
+ element type that is compatible to the declaration's return type. */
+ if (!simd && !comptypes (TREE_TYPE (decl), TREE_TYPE (variant), 0))
{
error_at (varid_loc, "variant %qD and base %qD have incompatible "
"types", variant, decl);
@@ -299,39 +299,58 @@ simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl)
(*args)[i] = TREE_TYPE ((*args)[i]);
}
+static tree
+find_variant_clauses (tree attr, tree *fn_decl, tree *device_clauses)
+{
+ if (!attr)
+ return NULL_TREE;
+
+ gcc_assert (TREE_CODE (attr) == TREE_LIST);
+
+ *fn_decl = TREE_PURPOSE (attr);
+
+ tree clauses = TREE_VALUE (attr);
+ tree simd_clause = NULL_TREE;
+
+ while (clauses)
+ {
+ tree identifier = TREE_PURPOSE (clauses);
+ if (identifier == maybe_get_identifier ("construct"))
+ {
+ tree construct_clauses = TREE_VALUE (clauses);
+ while (construct_clauses)
+ {
+ identifier = TREE_PURPOSE (construct_clauses);
+ if (identifier == maybe_get_identifier ("simd"))
+ simd_clause = TREE_VALUE (construct_clauses);
+ else
+ return NULL_TREE;
+ construct_clauses = TREE_CHAIN (construct_clauses);
+ }
+ }
+ else if (identifier == maybe_get_identifier ("device"))
+ *device_clauses = TREE_VALUE (clauses);
+ clauses = TREE_CHAIN (clauses);
+ }
+ return simd_clause;
+}
+
/* Given a simd function in NODE, extract the simd specific
information from the OMP clauses passed in CLAUSES, and return
the struct cgraph_simd_clone * if it should be cloned. *INBRANCH_SPECIFIED
is set to TRUE if the `inbranch' or `notinbranch' clause specified,
otherwise set to FALSE. */
-static struct cgraph_simd_clone *
-simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
+static bool
+simd_clone_clauses_extract (struct cgraph_node *node ATTRIBUTE_UNUSED,
+ struct cgraph_simd_clone *clone_info,
+ auto_vec<tree> &args, tree clauses,
bool *inbranch_specified)
{
- auto_vec<tree> args;
- simd_clone_vector_of_formal_parm_types (&args, node->decl);
- tree t;
- int n;
- *inbranch_specified = false;
-
- n = args.length ();
- if (n > 0 && args.last () == void_type_node)
- n--;
-
- /* Allocate one more than needed just in case this is an in-branch
- clone which will require a mask argument. */
- struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
- clone_info->nargs = n;
-
- if (!clauses)
- goto out;
-
- clauses = TREE_VALUE (clauses);
if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
- goto out;
+ return true;
- for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
+ for (tree t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
{
switch (OMP_CLAUSE_CODE (t))
{
@@ -390,13 +409,13 @@ simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
{
warning_at (OMP_CLAUSE_LOCATION (t), 0,
"ignoring large linear step");
- return NULL;
+ return false;
}
else if (integer_zerop (step))
{
warning_at (OMP_CLAUSE_LOCATION (t), 0,
"ignoring zero linear step");
- return NULL;
+ return false;
}
else
{
@@ -453,7 +472,76 @@ simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
}
}
- out:
+ return true;
+}
+
+static struct cgraph_simd_clone *
+create_simd_clone_for_simd_or_variant (struct cgraph_node *node, tree attr,
+ bool variant, bool *inbranch_specified)
+{
+ tree fn_decl = NULL_TREE;
+ tree device_clauses = NULL_TREE;
+ *inbranch_specified = false;
+
+ tree simd_clauses;
+ if (variant)
+ simd_clauses = find_variant_clauses (attr, &fn_decl, &device_clauses);
+ else
+ {
+ /* ATTR is currently pointing to 'omp declare simd', use TREE_VALUE to
+ to get the TREE_LIST with OMP_CLAUSE. */
+ simd_clauses = TREE_VALUE (attr);
+ /* If SIMD_CLAUSES is not NULL_TREE, then it should be a TREE_LIST with
+ OMP_CLAUSE inside. */
+ if (simd_clauses)
+ simd_clauses = TREE_VALUE (simd_clauses);
+ }
+ auto_vec<tree> args;
+ simd_clone_vector_of_formal_parm_types (&args, node->decl);
+
+ int n = args.length ();
+ if (n > 0 && args.last () == void_type_node)
+ n--;
+
+ /* Allocate one more than needed just in case this is an in-branch
+ clone which will require a mask argument. */
+ struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+ clone_info->nargs = n;
+
+ if (!simd_clone_clauses_extract (node, clone_info, args,
+ simd_clauses,
+ inbranch_specified))
+ return NULL;
+
+ if (!clone_info)
+ return NULL;
+
+ clone_info->device = 0;
+ if (device_clauses)
+ {
+ while (device_clauses)
+ {
+ tree identifier = TREE_PURPOSE (device_clauses);
+ if (identifier == maybe_get_identifier ("isa"))
+ {
+ tree string_cst = TREE_VALUE (TREE_VALUE (device_clauses));
+ const char * string_cst_p = TREE_STRING_POINTER (string_cst);
+ clone_info->device
+ |= targetm.omp.device_kind_arch_isa (omp_device_isa,
+ string_cst_p);
+ }
+ else if (identifier == maybe_get_identifier ("arch"))
+ {
+ tree string_cst = TREE_VALUE (TREE_VALUE (device_clauses));
+ const char * string_cst_p = TREE_STRING_POINTER (string_cst);
+ clone_info->device
+ |= targetm.omp.device_kind_arch_isa (omp_device_arch,
+ string_cst_p);
+ }
+ device_clauses = TREE_CHAIN (device_clauses);
+ }
+ }
+
if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl))))
{
warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
@@ -473,6 +561,11 @@ simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
return NULL;
}
+ if (variant)
+ clone_info->variant_name = DECL_NAME (TREE_PURPOSE (attr));
+ else
+ clone_info->variant_name = NULL_TREE;
+
return clone_info;
}
@@ -531,6 +624,9 @@ static tree
simd_clone_mangle (struct cgraph_node *node,
struct cgraph_simd_clone *clone_info)
{
+ if (clone_info->variant_name)
+ return clone_info->variant_name;
+
char vecsize_mangle = clone_info->vecsize_mangle;
char mask = clone_info->inbranch ? 'M' : 'N';
poly_uint64 simdlen = clone_info->simdlen;
@@ -1911,21 +2007,48 @@ simd_clone_adjust (struct cgraph_node *node)
pop_cfun ();
}
+tree
+get_simd_or_variant_attrs (tree attrs, bool *variant)
+{
+ tree attr = lookup_attribute ("omp declare simd", attrs);
+ *variant = false;
+ if (attr)
+ return attr;
+ attr = lookup_attribute ("omp declare variant base", attrs);
+ if (!attr)
+ return NULL_TREE;
+ /* Go through the 'omp declare variant base' and function declaration. */
+ attr = TREE_VALUE (TREE_VALUE (attr));
+ attr = lookup_attribute ("construct", attr);
+ if (!attr)
+ return NULL_TREE;
+ /* Go through 'construct'. */
+ attr = TREE_VALUE (attr);
+ attr = lookup_attribute ("simd", attr);
+ if (!attr)
+ return NULL_TREE;
+ *variant = true;
+ return TREE_VALUE (attrs);
+}
+
/* If the function in NODE is tagged as an elemental SIMD function,
create the appropriate SIMD clones. */
void
expand_simd_clones (struct cgraph_node *node)
{
- tree attr;
+ tree attr, attrs;
+ bool variant = false;
bool explicit_p = true;
if (node->inlined_to
|| lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
return;
- attr = lookup_attribute ("omp declare simd",
- DECL_ATTRIBUTES (node->decl));
+ attrs = DECL_ATTRIBUTES (node->decl);
+ attr = get_simd_or_variant_attrs (attrs, &variant);
+ if (variant)
+ explicit_p = false;
/* See if we can add an "omp declare simd" directive implicitly
before giving up. */
@@ -1944,8 +2067,7 @@ expand_simd_clones (struct cgraph_node *node)
&& !oacc_get_fn_attrib (node->decl)
&& ok_for_auto_simd_clone (node))
{
- attr = tree_cons (get_identifier ("omp declare simd"), NULL,
- DECL_ATTRIBUTES (node->decl));
+ attr = tree_cons (get_identifier ("omp declare simd"), NULL, attrs);
DECL_ATTRIBUTES (node->decl) = attr;
explicit_p = false;
}
@@ -1970,8 +2092,9 @@ expand_simd_clones (struct cgraph_node *node)
/* Start with parsing the "omp declare simd" attribute(s). */
bool inbranch_clause_specified;
struct cgraph_simd_clone *clone_info
- = simd_clone_clauses_extract (node, TREE_VALUE (attr),
- &inbranch_clause_specified);
+ = create_simd_clone_for_simd_or_variant (node, attr, variant,
+ &inbranch_clause_specified);
+
if (clone_info == NULL)
continue;
@@ -2070,7 +2193,8 @@ expand_simd_clones (struct cgraph_node *node)
IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)));
}
}
- while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
+ while ((attrs = TREE_CHAIN (attrs))
+ && (attr = get_simd_or_variant_attrs (attrs, &variant)));
}
/* Entry point for IPA simd clone creation pass. */
@@ -1683,7 +1683,8 @@ DEFHOOK
device trait set, return 0 if not present in any OpenMP context in the\n\
whole translation unit, or -1 if not present in the current OpenMP context\n\
but might be present in another OpenMP context in the same TU.",
-int, (enum omp_device_kind_arch_isa trait, const char *name), NULL)
+int, (enum omp_device_kind_arch_isa trait, const char *name),
+default_omp_device_kind_arch_isa)
HOOK_VECTOR_END (omp)
@@ -76,6 +76,8 @@ extern tree default_mangle_assembler_name (const char *);
extern tree default_simd_clone_adjust_ret_or_param
(struct cgraph_node *,tree , bool);
+extern int default_omp_device_kind_arch_isa
+ (omp_device_kind_arch_isa , const char *);
extern machine_mode default_translate_mode_attribute (machine_mode);
extern bool default_scalar_mode_supported_p (scalar_mode);
@@ -408,6 +408,14 @@ default_simd_clone_adjust_ret_or_param (struct cgraph_node *node ATTRIBUTE_UNUSE
return type;
}
+int
+default_omp_device_kind_arch_isa (omp_device_kind_arch_isa trait, const char *name)
+{
+ if (trait == omp_device_kind)
+ return strncmp (name, "cpu", strlen ("cpu")) == 0;
+ return 0;
+}
+
/* The default implementation of TARGET_TRANSLATE_MODE_ATTRIBUTE. */
machine_mode
new file mode 100644
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp" } */
+
+#include "declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVnN4v_callee" } } */
+/* { dg-final { scan-assembler "_ZGVnN8v_callee" } } */
new file mode 100644
@@ -0,0 +1,27 @@
+#if __ARM_FEATURE_SVE
+__SVInt16_t _ZGVsMxv_callee (__SVInt16_t, __SVBool_t);
+__SVInt16_t _ZGVsM8v_callee (__SVInt16_t, __SVBool_t);
+__SVInt16_t _ZGVsM16v_callee (__SVInt16_t, __SVBool_t);
+#endif
+__Int16x4_t _ZGVnN4v_callee (__Int16x4_t);
+__Int16x8_t _ZGVnN8v_callee (__Int16x8_t);
+#if __ARM_FEATURE_SVE
+#pragma omp declare variant(_ZGVsM16v_callee) \
+ match(construct = {simd(notinbranch, simdlen(16))}, device = {isa("sve")})
+#pragma omp declare variant(_ZGVsM8v_callee) \
+ match(construct = {simd(notinbranch, simdlen(8))}, device = {isa("sve")})
+#pragma omp declare variant(_ZGVsMxv_callee) \
+ match(construct = {simd(notinbranch)}, device = {isa("sve")})
+#endif
+#pragma omp declare variant(_ZGVnN4v_callee) \
+ match(construct = {simd(notinbranch, simdlen(4))}, device = {isa("simd")})
+#pragma omp declare variant(_ZGVnN8v_callee) \
+ match(construct = {simd(notinbranch, simdlen(8))}, device = {isa("simd")})
+extern short __attribute__ ((const)) callee (short);
+
+
+void caller_autovec (short * __restrict a, short *__restrict b, unsigned n)
+{
+ for (unsigned i = 0; i < n; ++i)
+ a[i] = callee (b[i]);
+}
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsMxv_callee" } } */
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp -msve-vector-bits=128" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsM8v_callee" } } */
new file mode 100644
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp -msve-vector-bits=256" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsM16v_callee" } } */