diff mbox series

[RFC,5/X] omp: Create simd clones from 'omp declare variant's

Message ID ce294c68-cfb4-9716-f939-7bbf0e9a6205@arm.com
State New
Headers show
Series [RFC,5/X] omp: Create simd clones from 'omp declare variant's | expand

Commit Message

Andre Vieira (lists) March 8, 2023, 4:26 p.m. UTC
Hi,

This RFC extends the omp-simd-clone pass to create simd clones for 
functions with 'omp declare variant' pragmas that contain simd 
constructs. This patch also implements AArch64's use for this functionality.
This requires two extra pieces of information be kept for each 
simd-clone, a 'variant_name' since each variant has to be named upon 
declaration, and a 'device' since a omp variant has the capability of 
having device clauses that can 'select' the device the variant is meant 
to be used with. For the latter I decided to currently implement it as 
an 'int', to keep a 'code' per device which is target dependent. Though 
we may want to expand this in the future to contain a target dependent 
'target selector' of sorts. This would enable the implementation of the 
'arch' device clause we describe in the BETA ABI can be found in the 
vfabia64 subdir of https://github.com/ARM-software/abi-aa/, this patch 
only implements support for the two 'isa' device clauses isa("simd") and 
isa("sve").

I'll create a ChangeLog when I turn this into a PATCH if we agree on 
this direction.
diff mbox series

Patch

diff --git a/gcc/cgraph.h b/gcc/cgraph.h
index b5fc739f1b0602a871040292a5bb1d69a9ef305f..ae1af65a9b5913ec435e783223e79767ddd68341 100644
--- a/gcc/cgraph.h
+++ b/gcc/cgraph.h
@@ -810,6 +810,14 @@  struct GTY(()) cgraph_simd_clone {
   /* Original cgraph node the SIMD clones were created for.  */
   cgraph_node *origin;
 
+  /* This is a flag to indicate what device was selected for the variant
+     clone.  Always 0 for 'omp declare simd' clones.  */
+  unsigned device;
+
+  /* The identifier for the name of the variant in case of a declare variant
+     clone, this is NULL_TREE for declare simd clones.  */
+  tree variant_name;
+
   /* Annotated function arguments for the original function.  */
   cgraph_simd_clone_arg GTY((length ("%h.nargs"))) args[1];
 };
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index ef93a4e9d43799df4410f152cdd798db285e8897..344c6001fdd646a31326f5deb8ff94873d346ed1 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -26970,15 +26970,28 @@  aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 
   clonei->mask_mode = VOIDmode;
   elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+  /* A simdclone without simdlen can legally originate from either a:
+     'omp declare simd':
+	In this case generate at least 3 simd clones, one for Advanced SIMD
+	64-bit vectors, one for Advanced SIMD 128-bit vectors and one for SVE
+	vector length agnostic vectors.
+      'omp declare variant':
+	In this case we must be generating a simd clone for SVE vector length
+	agnostic vectors.
+   */
   if (known_eq (clonei->simdlen, 0U))
     {
-      if (num >= 2)
+      if (clonei->device == 2 || num >= 2)
 	{
+	  count = 1;
 	  vec_bits = poly_uint64 (128, 128);
 	  clonei->simdlen = exact_div (vec_bits, elt_bits);
 	}
       else
 	{
+	  if (clonei->device != 0)
+	    return 0;
+
 	  count = 3;
 	  vec_bits = (num == 0 ? 64 : 128);
 	  clonei->simdlen = exact_div (vec_bits, elt_bits);
@@ -26991,7 +27004,14 @@  aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
       /* For now, SVE simdclones won't produce illegal simdlen, So only check
 	 const simdlens here.  */
       if (clonei->simdlen.is_constant (&const_simdlen)
-	  && maybe_ne (vec_bits, 64U) && maybe_ne (vec_bits, 128U))
+	  /* For Advanced SIMD we require either 64- or 128-bit vectors.  */
+	  && ((clonei->device < 2
+	       && maybe_ne (vec_bits, 64U)
+	       && maybe_ne (vec_bits, 128U))
+	  /* For SVE we require multiples of 128-bits.  TODO: should we check
+	     for max VL?  */
+	      || (clonei->device == 2
+		  && !constant_multiple_p (vec_bits, 128))))
 	{
 	  if (explicit_p)
 	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
@@ -27002,7 +27022,7 @@  aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 	}
     }
 
-  if (num >= 2)
+  if (clonei->device == 2 || num >= 2)
     {
       clonei->vecsize_mangle = 's';
       clonei->inbranch = 1;
@@ -27082,22 +27102,21 @@  aarch64_simd_clone_adjust_ret_or_param (struct cgraph_node *node, tree type,
 	aarch64_sve_vg = poly_uint16 (2, 2);
       unsigned int num_zr = 0;
       unsigned int num_pr = 0;
+      tree base_type = TREE_TYPE (type);
+      if (POINTER_TYPE_P (base_type))
+	base_type = pointer_sized_int_node;
+      scalar_mode base_mode = as_a <scalar_mode> (TYPE_MODE (base_type));
+      machine_mode vec_mode = aarch64_full_sve_mode (base_mode).require ();
+      tree vectype = build_vector_type_for_mode (base_type, vec_mode);
       if (is_mask)
 	{
-	  type = truth_type_for (type);
 	  num_pr = 1;
+	  type = truth_type_for (vectype);
 	}
       else
 	{
 	  num_zr = 1;
-	  tree base_type = TREE_TYPE (type);
-	  if (POINTER_TYPE_P (base_type))
-	    base_type = pointer_sized_int_node;
-	  poly_int64 vec_size = tree_to_poly_int64 (TYPE_SIZE (type));
-	  scalar_mode base_mode = as_a <scalar_mode> (TYPE_MODE (base_type));
-	  machine_mode vec_mode
-	    = aarch64_simd_container_mode (base_mode, vec_size);
-	  type = build_vector_type_for_mode (base_type, vec_mode);
+	  type = vectype;
 	}
 
       aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
@@ -27223,6 +27242,22 @@  aarch64_can_tag_addresses ()
   return !TARGET_ILP32;
 }
 
+int
+aarch64_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
+				  const char *name)
+{
+  if (trait != omp_device_isa)
+    return default_omp_device_kind_arch_isa (trait, name);
+
+  if (strncmp (name, "simd", strlen ("simd")) == 0)
+    return 1;
+  if (strncmp (name, "sve", strlen ("sve")) == 0
+      && TARGET_SVE)
+    return 2;
+
+  return 0;
+}
+
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
 #define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
@@ -28146,6 +28181,9 @@  aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MEMTAG_CAN_TAG_ADDRESSES
 #define TARGET_MEMTAG_CAN_TAG_ADDRESSES aarch64_can_tag_addresses
 
+#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
+#define TARGET_OMP_DEVICE_KIND_ARCH_ISA aarch64_omp_device_kind_arch_isa
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc
index 30c7470974d4b62ec6c03b2a7dd37f046983a247..1aa5f1a7898df9483a2af4f6f9fea99e6b219271 100644
--- a/gcc/cp/decl.cc
+++ b/gcc/cp/decl.cc
@@ -7998,6 +7998,18 @@  decl_maybe_constant_destruction (tree decl, tree type)
 
 static tree declare_simd_adjust_this (tree *, int *, void *);
 
+tree declare_variant_adjust_parm (tree *tp, int *walk_subtrees, void *data)
+{
+  tree *parm = (tree *) data;
+  if (TREE_CODE (*tp) == FUNCTION_DECL)
+    {
+      *parm = DECL_ARGUMENTS (*tp);
+      *walk_subtrees = 0;
+      return NULL_TREE;
+    }
+  return NULL_TREE;
+}
+
 /* Helper function of omp_declare_variant_finalize.  Finalize one
    "omp declare variant base" attribute.  Return true if it should be
    removed.  */
@@ -8015,13 +8027,14 @@  omp_declare_variant_finalize_one (tree decl, tree attr)
 
   tree ctx = TREE_VALUE (TREE_VALUE (attr));
   tree simd = omp_get_context_selector (ctx, "construct", "simd");
+  tree parm = NULL_TREE;
   if (simd)
     {
       TREE_VALUE (simd)
 	= c_omp_declare_simd_clauses_to_numbers (DECL_ARGUMENTS (decl),
 						 TREE_VALUE (simd));
-      /* FIXME, adjusting simd args unimplemented.  */
-      return true;
+      walk_tree (&TREE_PURPOSE (TREE_VALUE (attr)), declare_variant_adjust_parm,
+		 &parm, NULL);
     }
 
   tree chain = TREE_CHAIN (TREE_VALUE (attr));
@@ -8035,7 +8048,8 @@  omp_declare_variant_finalize_one (tree decl, tree attr)
   input_location = varid_loc;
 
   releasing_vec args;
-  tree parm = DECL_ARGUMENTS (decl);
+  if (!parm)
+    parm = DECL_ARGUMENTS (decl);
   if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE)
     parm = DECL_CHAIN (parm);
   for (; parm; parm = DECL_CHAIN (parm))
@@ -8096,7 +8110,9 @@  omp_declare_variant_finalize_one (tree decl, tree attr)
   if (variant)
     {
       const char *varname = IDENTIFIER_POINTER (DECL_NAME (variant));
-      if (!comptypes (TREE_TYPE (decl), TREE_TYPE (variant), 0))
+      /* TODO: Should we check that if (simd) the return vector type has an
+	 element type that is compatible to the declaration's return type.  */
+      if (!simd && !comptypes (TREE_TYPE (decl), TREE_TYPE (variant), 0))
 	{
 	  error_at (varid_loc, "variant %qD and base %qD have incompatible "
 			       "types", variant, decl);
diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc
index 4808608b7a1c06802ee231480c2003cf41c11799..9e7e1a15cb0c1ddf59e99c568d16c45fede5f8a8 100644
--- a/gcc/omp-simd-clone.cc
+++ b/gcc/omp-simd-clone.cc
@@ -299,39 +299,58 @@  simd_clone_vector_of_formal_parm_types (vec<tree> *args, tree fndecl)
     (*args)[i] = TREE_TYPE ((*args)[i]);
 }
 
+static tree
+find_variant_clauses (tree attr, tree *fn_decl, tree *device_clauses)
+{
+  if (!attr)
+    return NULL_TREE;
+
+  gcc_assert (TREE_CODE (attr) == TREE_LIST);
+
+  *fn_decl = TREE_PURPOSE (attr);
+
+  tree clauses = TREE_VALUE (attr);
+  tree simd_clause = NULL_TREE;
+
+  while (clauses)
+    {
+      tree identifier = TREE_PURPOSE (clauses);
+      if (identifier == maybe_get_identifier ("construct"))
+	{
+	  tree construct_clauses = TREE_VALUE (clauses);
+	  while (construct_clauses)
+	    {
+	      identifier = TREE_PURPOSE (construct_clauses);
+	      if (identifier == maybe_get_identifier ("simd"))
+		simd_clause = TREE_VALUE (construct_clauses);
+	      else
+		return NULL_TREE;
+	      construct_clauses = TREE_CHAIN (construct_clauses);
+	    }
+	}
+      else if (identifier == maybe_get_identifier ("device"))
+	*device_clauses = TREE_VALUE (clauses);
+      clauses = TREE_CHAIN (clauses);
+    }
+  return simd_clause;
+}
+
 /* Given a simd function in NODE, extract the simd specific
    information from the OMP clauses passed in CLAUSES, and return
    the struct cgraph_simd_clone * if it should be cloned.  *INBRANCH_SPECIFIED
    is set to TRUE if the `inbranch' or `notinbranch' clause specified,
    otherwise set to FALSE.  */
 
-static struct cgraph_simd_clone *
-simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
+static bool
+simd_clone_clauses_extract (struct cgraph_node *node ATTRIBUTE_UNUSED,
+			    struct cgraph_simd_clone *clone_info,
+			    auto_vec<tree> &args, tree clauses,
 			    bool *inbranch_specified)
 {
-  auto_vec<tree> args;
-  simd_clone_vector_of_formal_parm_types (&args, node->decl);
-  tree t;
-  int n;
-  *inbranch_specified = false;
-
-  n = args.length ();
-  if (n > 0 && args.last () == void_type_node)
-    n--;
-
-  /* Allocate one more than needed just in case this is an in-branch
-     clone which will require a mask argument.  */
-  struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
-  clone_info->nargs = n;
-
-  if (!clauses)
-    goto out;
-
-  clauses = TREE_VALUE (clauses);
   if (!clauses || TREE_CODE (clauses) != OMP_CLAUSE)
-    goto out;
+    return true;
 
-  for (t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
+  for (tree t = clauses; t; t = OMP_CLAUSE_CHAIN (t))
     {
       switch (OMP_CLAUSE_CODE (t))
 	{
@@ -390,13 +409,13 @@  simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
 		  {
 		    warning_at (OMP_CLAUSE_LOCATION (t), 0,
 				"ignoring large linear step");
-		    return NULL;
+		    return false;
 		  }
 		else if (integer_zerop (step))
 		  {
 		    warning_at (OMP_CLAUSE_LOCATION (t), 0,
 				"ignoring zero linear step");
-		    return NULL;
+		    return false;
 		  }
 		else
 		  {
@@ -453,7 +472,76 @@  simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
 	}
     }
 
- out:
+  return true;
+}
+
+static struct cgraph_simd_clone *
+create_simd_clone_for_simd_or_variant (struct cgraph_node *node, tree attr,
+				       bool variant, bool *inbranch_specified)
+{
+  tree fn_decl = NULL_TREE;
+  tree device_clauses = NULL_TREE;
+  *inbranch_specified = false;
+
+  tree simd_clauses;
+  if (variant)
+    simd_clauses = find_variant_clauses (attr, &fn_decl, &device_clauses);
+  else
+    {
+      /* ATTR is currently pointing to 'omp declare simd', use TREE_VALUE to
+	 to get the TREE_LIST with OMP_CLAUSE.  */
+      simd_clauses = TREE_VALUE (attr);
+      /* If SIMD_CLAUSES is not NULL_TREE, then it should be a TREE_LIST with
+	 OMP_CLAUSE inside.  */
+      if (simd_clauses)
+	simd_clauses = TREE_VALUE (simd_clauses);
+    }
+  auto_vec<tree> args;
+  simd_clone_vector_of_formal_parm_types (&args, node->decl);
+
+  int n = args.length ();
+  if (n > 0 && args.last () == void_type_node)
+    n--;
+
+  /* Allocate one more than needed just in case this is an in-branch
+     clone which will require a mask argument.  */
+  struct cgraph_simd_clone *clone_info = simd_clone_struct_alloc (n + 1);
+  clone_info->nargs = n;
+
+  if (!simd_clone_clauses_extract (node, clone_info, args,
+				   simd_clauses,
+				   inbranch_specified))
+    return NULL;
+
+  if (!clone_info)
+    return NULL;
+
+  clone_info->device = 0;
+  if (device_clauses)
+    {
+      while (device_clauses)
+	{
+	  tree identifier = TREE_PURPOSE (device_clauses);
+	  if (identifier == maybe_get_identifier ("isa"))
+	    {
+	      tree string_cst = TREE_VALUE (TREE_VALUE (device_clauses));
+	      const char * string_cst_p = TREE_STRING_POINTER (string_cst);
+	      clone_info->device
+		|= targetm.omp.device_kind_arch_isa (omp_device_isa,
+						    string_cst_p);
+	    }
+	  else if (identifier == maybe_get_identifier ("arch"))
+	    {
+	      tree string_cst = TREE_VALUE (TREE_VALUE (device_clauses));
+	      const char * string_cst_p = TREE_STRING_POINTER (string_cst);
+	      clone_info->device
+		|= targetm.omp.device_kind_arch_isa (omp_device_arch,
+						    string_cst_p);
+	    }
+	  device_clauses = TREE_CHAIN (device_clauses);
+	}
+    }
+
   if (TYPE_ATOMIC (TREE_TYPE (TREE_TYPE (node->decl))))
     {
       warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
@@ -473,6 +561,11 @@  simd_clone_clauses_extract (struct cgraph_node *node, tree clauses,
 	return NULL;
       }
 
+  if (variant)
+    clone_info->variant_name = DECL_NAME (TREE_PURPOSE (attr));
+  else
+    clone_info->variant_name = NULL_TREE;
+
   return clone_info;
 }
 
@@ -531,6 +624,9 @@  static tree
 simd_clone_mangle (struct cgraph_node *node,
 		   struct cgraph_simd_clone *clone_info)
 {
+  if (clone_info->variant_name)
+    return clone_info->variant_name;
+
   char vecsize_mangle = clone_info->vecsize_mangle;
   char mask = clone_info->inbranch ? 'M' : 'N';
   poly_uint64 simdlen = clone_info->simdlen;
@@ -1911,21 +2007,48 @@  simd_clone_adjust (struct cgraph_node *node)
   pop_cfun ();
 }
 
+tree
+get_simd_or_variant_attrs (tree attrs, bool *variant)
+{
+  tree attr = lookup_attribute ("omp declare simd", attrs);
+  *variant = false;
+  if (attr)
+    return attr;
+  attr = lookup_attribute ("omp declare variant base", attrs);
+  if (!attr)
+    return NULL_TREE;
+  /* Go through the 'omp declare variant base' and function declaration.  */
+  attr = TREE_VALUE (TREE_VALUE (attr));
+  attr = lookup_attribute ("construct", attr);
+  if (!attr)
+    return NULL_TREE;
+  /* Go through 'construct'.  */
+  attr = TREE_VALUE (attr);
+  attr = lookup_attribute ("simd", attr);
+  if (!attr)
+    return NULL_TREE;
+  *variant = true;
+  return TREE_VALUE (attrs);
+}
+
 /* If the function in NODE is tagged as an elemental SIMD function,
    create the appropriate SIMD clones.  */
 
 void
 expand_simd_clones (struct cgraph_node *node)
 {
-  tree attr;
+  tree attr, attrs;
+  bool variant = false;
   bool explicit_p = true;
 
   if (node->inlined_to
       || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
     return;
 
-  attr = lookup_attribute ("omp declare simd",
-			   DECL_ATTRIBUTES (node->decl));
+  attrs = DECL_ATTRIBUTES (node->decl);
+  attr = get_simd_or_variant_attrs (attrs, &variant);
+  if (variant)
+    explicit_p = false;
 
   /* See if we can add an "omp declare simd" directive implicitly
      before giving up.  */
@@ -1944,8 +2067,7 @@  expand_simd_clones (struct cgraph_node *node)
       && !oacc_get_fn_attrib (node->decl)
       && ok_for_auto_simd_clone (node))
     {
-      attr = tree_cons (get_identifier ("omp declare simd"), NULL,
-			DECL_ATTRIBUTES (node->decl));
+      attr = tree_cons (get_identifier ("omp declare simd"), NULL, attrs);
       DECL_ATTRIBUTES (node->decl) = attr;
       explicit_p = false;
     }
@@ -1970,8 +2092,9 @@  expand_simd_clones (struct cgraph_node *node)
       /* Start with parsing the "omp declare simd" attribute(s).  */
       bool inbranch_clause_specified;
       struct cgraph_simd_clone *clone_info
-	= simd_clone_clauses_extract (node, TREE_VALUE (attr),
-				      &inbranch_clause_specified);
+	= create_simd_clone_for_simd_or_variant (node, attr, variant,
+						 &inbranch_clause_specified);
+
       if (clone_info == NULL)
 	continue;
 
@@ -2070,7 +2193,8 @@  expand_simd_clones (struct cgraph_node *node)
 		     IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl)));
 	}
     }
-  while ((attr = lookup_attribute ("omp declare simd", TREE_CHAIN (attr))));
+  while ((attrs = TREE_CHAIN (attrs))
+	  && (attr = get_simd_or_variant_attrs (attrs, &variant)));
 }
 
 /* Entry point for IPA simd clone creation pass.  */
diff --git a/gcc/target.def b/gcc/target.def
index ffa12aa9023bb8f26a647a9848800c77f34afc67..ba14cc6da9dba5b6294c78c54b95b4622ea3139a 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1683,7 +1683,8 @@  DEFHOOK
 device trait set, return 0 if not present in any OpenMP context in the\n\
 whole translation unit, or -1 if not present in the current OpenMP context\n\
 but might be present in another OpenMP context in the same TU.",
-int, (enum omp_device_kind_arch_isa trait, const char *name), NULL)
+int, (enum omp_device_kind_arch_isa trait, const char *name),
+default_omp_device_kind_arch_isa)
 
 HOOK_VECTOR_END (omp)
 
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 860fb8ccbf1ab00c43dc4b4d32808c1f488406e4..2599d3fad0451e00df8fcf2ac1d6434d33fd9997 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -76,6 +76,8 @@  extern tree default_mangle_assembler_name (const char *);
 extern tree default_simd_clone_adjust_ret_or_param
   (struct cgraph_node *,tree , bool);
 
+extern int default_omp_device_kind_arch_isa
+  (omp_device_kind_arch_isa , const char *);
 
 extern machine_mode default_translate_mode_attribute (machine_mode);
 extern bool default_scalar_mode_supported_p (scalar_mode);
diff --git a/gcc/targhooks.cc b/gcc/targhooks.cc
index 4e54ceb0297828cf13e418dfa113651670a6f112..e6c65447fb7db4ff96108de98807a012d839bac8 100644
--- a/gcc/targhooks.cc
+++ b/gcc/targhooks.cc
@@ -408,6 +408,14 @@  default_simd_clone_adjust_ret_or_param (struct cgraph_node *node ATTRIBUTE_UNUSE
   return type;
 }
 
+int
+default_omp_device_kind_arch_isa (omp_device_kind_arch_isa trait, const char *name)
+{
+    if (trait == omp_device_kind)
+      return strncmp (name, "cpu", strlen ("cpu")) == 0;
+    return 0;
+}
+
 /* The default implementation of TARGET_TRANSLATE_MODE_ATTRIBUTE.  */
 
 machine_mode
diff --git a/gcc/testsuite/gcc.target/aarch64/declare-variant-1.c b/gcc/testsuite/gcc.target/aarch64/declare-variant-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..c44c9464f4e27047db9be5b0c9710ae3cfee8eee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/declare-variant-1.c
@@ -0,0 +1,7 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp" } */
+
+#include "declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVnN4v_callee" } } */
+/* { dg-final { scan-assembler "_ZGVnN8v_callee" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/declare-variant-1.x b/gcc/testsuite/gcc.target/aarch64/declare-variant-1.x
new file mode 100644
index 0000000000000000000000000000000000000000..61bcf8eff02e415a5044a7cbda8a593607fd0c56
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/declare-variant-1.x
@@ -0,0 +1,27 @@ 
+#if __ARM_FEATURE_SVE
+__SVInt16_t _ZGVsMxv_callee (__SVInt16_t, __SVBool_t);
+__SVInt16_t _ZGVsM8v_callee (__SVInt16_t, __SVBool_t);
+__SVInt16_t _ZGVsM16v_callee (__SVInt16_t, __SVBool_t);
+#endif
+__Int16x4_t _ZGVnN4v_callee (__Int16x4_t);
+__Int16x8_t _ZGVnN8v_callee (__Int16x8_t);
+#if __ARM_FEATURE_SVE
+#pragma omp declare variant(_ZGVsM16v_callee) \
+    match(construct = {simd(notinbranch, simdlen(16))}, device = {isa("sve")})
+#pragma omp declare variant(_ZGVsM8v_callee) \
+    match(construct = {simd(notinbranch, simdlen(8))}, device = {isa("sve")})
+#pragma omp declare variant(_ZGVsMxv_callee) \
+    match(construct = {simd(notinbranch)}, device = {isa("sve")})
+#endif
+#pragma omp declare variant(_ZGVnN4v_callee) \
+    match(construct = {simd(notinbranch, simdlen(4))}, device = {isa("simd")})
+#pragma omp declare variant(_ZGVnN8v_callee) \
+    match(construct = {simd(notinbranch, simdlen(8))}, device = {isa("simd")})
+extern short __attribute__ ((const)) callee (short);
+
+
+void caller_autovec (short * __restrict a, short *__restrict b, unsigned n)
+{
+  for (unsigned i = 0; i < n; ++i)
+    a[i] = callee (b[i]);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-1.c b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-1.c
new file mode 100644
index 0000000000000000000000000000000000000000..7a8129fe88ac9759b2337892a3d14f4e8196e61f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-1.c
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsMxv_callee" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-2.c b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..2b6eabac76cf1cd059ec8d960ddd9e30973dc797
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-2.c
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp -msve-vector-bits=128" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsM8v_callee" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-3.c b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..e8b598fe479d7e1e92eb7f9e3413d5ac183626a9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/declare-variant-3.c
@@ -0,0 +1,6 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O3 -fopenmp -msve-vector-bits=256" } */
+
+#include "../declare-variant-1.x"
+
+/* { dg-final { scan-assembler "_ZGVsM16v_callee" } } */