diff mbox series

[6/8] aarch64: Add support for SME_F16F16

Message ID mptbjyg4q5m.fsf@arm.com
State New
Headers show
Series aarch64: Add support for new SVE and SME extensions | expand

Commit Message

Richard Sandiford Nov. 15, 2024, 12:50 p.m. UTC
This patch adds support for the SME_F16F16 extension.  The extension
adds two new instructions to convert from a single vector of f16s
to two vectors of f32s.  It also adds f16 variants of existing SME
ZA instructions.

gcc/
	* config/aarch64/aarch64-option-extensions.def
	(sme-f16f16): New extension.
	* doc/invoke.texi: Document it.  Also document that sme-i16i64 and
	sme-f64f64 enable SME.
	* config/aarch64/aarch64.h (TARGET_STREAMING_SME_F16F16): New macro.
	* config/aarch64/aarch64-c.cc (aarch64_update_cpp_builtins):
	Conditionally define __ARM_FEATURE_SME_F16F16.
	* config/aarch64/aarch64-sve-builtins-sve2.def (svcvt, svcvtl): Add
	new SME_F16F16 intrinsics.
	* config/aarch64/aarch64-sve-builtins-sme.def: Add SME_F16F16 forms
	of existing intrinsics.
	* config/aarch64/aarch64-sve-builtins.cc (TYPES_h_float)
	(TYPES_cvt_f32_f16, TYPES_za_h_float): New type macros.
	* config/aarch64/aarch64-sve-builtins-base.cc
	(svcvt_impl::expand): Add sext_optab as another possibility.
	* config/aarch64/aarch64-sve-builtins-sve2.h (svcvtl): Declare.
	* config/aarch64/aarch64-sve-builtins-sve2.cc (svcvtl_impl): New class.
	(svcvtl): New function.
	* config/aarch64/iterators.md (VNx8SF_ONLY): New mode iterator.
	(SME_ZA_SDFx24): Replace with...
	(SME_ZA_HSDFx24): ...this.
	(SME_MOP_SDF): Replace with...
	(SME_MOP_HSDF): ...this.
	(SME_BINARY_SLICE_SDF): Replace with...
	(SME_BINARY_SLICE_HSDF): ...this.
	* config/aarch64/aarch64-sve2.md (extendvnx8hfvnx8sf2)
	(@aarch64_sve_cvtl<mode>): New patterns.
	* config/aarch64/aarch64-sme.md
	(@aarch64_sme_<SME_BINARY_SLICE_SDF:optab><mode>): Extend to...
	(@aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>): ...this.
	(*aarch64_sme_<SME_BINARY_SLICE_SDF:optab><mode>_plus): Extend to...
	(*aarch64_sme_<SME_BINARY_SLICE_HSDF:optab><mode>_plus): ...this.
	(@aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>): Extend to
	HF modes.
	(*aarch64_sme_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus)
	(@aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>)
	(*aarch64_sme_single_<SME_FP_TERNARY_SLICE:optab><mode><mode>_plus)
	(@aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>)
	(*aarch64_sme_lane_<SME_FP_TERNARY_SLICE:optab><mode><mode>)
	(@aarch64_sme_<SME_FP_MOP:optab><mode><mode>): Likewise.

gcc/testsuite/
	* lib/target-supports.exp: Test the assembler for sve-f16f16 support.
	* gcc.target/aarch64/pragma_cpp_predefs_4.c: Add tests for
	__ARM_FEATURE_SME_F16F16.  Also extend the existing SME tests.
	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
	(TEST_X2_WIDE): New macro
	* gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c: New test.
	* gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c: Likewise.
	* gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c: Likewise.
---
 gcc/config/aarch64/aarch64-c.cc               |   2 +
 .../aarch64/aarch64-option-extensions.def     |   2 +
 gcc/config/aarch64/aarch64-sme.md             | 101 +++++-----
 .../aarch64/aarch64-sve-builtins-base.cc      |   5 +-
 .../aarch64/aarch64-sve-builtins-sme.def      |  15 ++
 .../aarch64/aarch64-sve-builtins-sve2.cc      |  11 ++
 .../aarch64/aarch64-sve-builtins-sve2.def     |   5 +
 .../aarch64/aarch64-sve-builtins-sve2.h       |   1 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |  15 ++
 gcc/config/aarch64/aarch64-sve2.md            |  26 +++
 gcc/config/aarch64/aarch64.h                  |   4 +
 gcc/config/aarch64/iterators.md               |  15 +-
 gcc/doc/invoke.texi                           |   9 +-
 .../gcc.target/aarch64/pragma_cpp_predefs_4.c |  30 ++-
 .../sme2/acle-asm/add_za16_f16_vg1x2.c        | 126 ++++++++++++
 .../sme2/acle-asm/add_za16_f16_vg1x4.c        | 141 ++++++++++++++
 .../aarch64/sme2/acle-asm/cvt_f32_f16_x2.c    |  54 +++++
 .../aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c   |  54 +++++
 .../sme2/acle-asm/mla_lane_za16_f16_vg1x2.c   | 106 ++++++++++
 .../sme2/acle-asm/mla_lane_za16_f16_vg1x4.c   | 112 +++++++++++
 .../sme2/acle-asm/mla_za16_f16_vg1x2.c        | 184 ++++++++++++++++++
 .../sme2/acle-asm/mla_za16_f16_vg1x4.c        | 176 +++++++++++++++++
 .../sme2/acle-asm/mls_lane_za16_f16_vg1x2.c   | 106 ++++++++++
 .../sme2/acle-asm/mls_lane_za16_f16_vg1x4.c   | 112 +++++++++++
 .../sme2/acle-asm/mls_za16_f16_vg1x2.c        | 184 ++++++++++++++++++
 .../sme2/acle-asm/mls_za16_f16_vg1x4.c        | 176 +++++++++++++++++
 .../aarch64/sme2/acle-asm/mopa_za16_f16.c     |  34 ++++
 .../aarch64/sme2/acle-asm/mops_za16_f16.c     |  34 ++++
 .../sme2/acle-asm/sub_za16_f16_vg1x2.c        | 126 ++++++++++++
 .../sme2/acle-asm/sub_za16_f16_vg1x4.c        | 141 ++++++++++++++
 .../aarch64/sve/acle/asm/test_sve_acle.h      |  16 ++
 gcc/testsuite/lib/target-supports.exp         |   3 +-
 32 files changed, 2067 insertions(+), 59 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index 0b59ceb49e3..cc4b7f96467 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -268,6 +268,8 @@  aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16),
+			"__ARM_FEATURE_SME_F16F16", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
   aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
 
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index a5ab16233ba..0667359da76 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -225,6 +225,8 @@  AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
 
 AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
 
+AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "")
+
 AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
 
 AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index e8a24e0b2f6..226fcbd7cef 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -926,6 +926,9 @@  (define_insn "@aarch64_sme_<optab><mode>"
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - ADD
+;; - FADD
+;; - FSUB
+;; - SUB
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode>"
@@ -954,26 +957,26 @@  (define_insn "*aarch64_sme_<optab><mode>_plus"
 )
 
 (define_insn "@aarch64_sme_<optab><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_BINARY_SLICE_SDF))]
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
 )
 
 (define_insn "*aarch64_sme_<optab><mode>_plus"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_BINARY_SLICE_SDF))]
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
 )
@@ -1634,70 +1637,70 @@  (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>_plu
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
 )
 
 (define_insn "*aarch64_sme_<optab><mode><mode>_plus"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
-	   (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
 )
 
 (define_insn "@aarch64_sme_single_<optab><mode><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
-	   (vec_duplicate:SME_ZA_SDFx24
-	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	   (match_operand:SME_ZA_HSDFx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HSDFx24
+	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
 )
 
 (define_insn "*aarch64_sme_single_<optab><mode><mode>_plus"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
-	   (vec_duplicate:SME_ZA_SDFx24
-	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	   (match_operand:SME_ZA_HSDFx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HSDFx24
+	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
   "TARGET_STREAMING_SME2"
   "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
 )
 
 (define_insn "@aarch64_sme_lane_<optab><mode><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
-	   (unspec:SME_ZA_SDFx24
-	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HSDFx24
+	     [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x")
 	      (match_operand:SI 3 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
@@ -1706,15 +1709,15 @@  (define_insn "@aarch64_sme_lane_<optab><mode><mode>"
 )
 
 (define_insn "*aarch64_sme_lane_<optab><mode><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
-	   (unspec:SME_ZA_SDFx24
-	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HSDFx24
+	     [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x")
 	      (match_operand:SI 4 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
@@ -1871,15 +1874,15 @@  (define_insn "*aarch64_sme_lane_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx124:mode>"
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode><mode>"
-  [(set (reg:SME_MOP_SDF ZA_REGNUM)
-	(unspec:SME_MOP_SDF
-	  [(reg:SME_MOP_SDF ZA_REGNUM)
+  [(set (reg:SME_MOP_HSDF ZA_REGNUM)
+	(unspec:SME_MOP_HSDF
+	  [(reg:SME_MOP_HSDF ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:DI 0 "const_int_operand")
 	   (match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SME_MOP_SDF 3 "register_operand" "w")
-	   (match_operand:SME_MOP_SDF 4 "register_operand" "w")]
+	   (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
+	   (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
 	  SME_FP_MOP))]
   "TARGET_STREAMING"
   "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>"
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index 2117eceb606..9e1a4fa6890 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -684,8 +684,11 @@  public:
 	  optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
 	else if (e.type_suffix (1).integer_p)
 	  optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
-	else
+	else if (e.type_suffix (0).element_bits
+		 < e.type_suffix (1).element_bits)
 	  optab = trunc_optab;
+	else
+	  optab = sext_optab;
 	icode = convert_optab_handler (optab, mode0, mode1);
 	gcc_assert (icode != CODE_FOR_nothing);
 	return e.use_exact_insn (icode);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index bc2c3323636..c79245d9249 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -206,6 +206,21 @@  DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float,
 DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_float, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_float, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
+
 #undef DEF_SME_ZA_FUNCTION
 #undef DEF_SME_ZA_FUNCTION_GS
 #undef DEF_SME_FUNCTION
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index fd0c98c6b68..b17b78dadd5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -211,6 +211,16 @@  public:
   }
 };
 
+class svcvtl_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_cvtl (e.result_mode ()));
+  }
+};
+
 class svcvtn_impl : public function_base
 {
 public:
@@ -908,6 +918,7 @@  FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
 FUNCTION (svclamp, svclamp_impl,)
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
+FUNCTION (svcvtl, svcvtl_impl,)
 FUNCTION (svcvtn, svcvtn_impl,)
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
 FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 39b5a59ae79..2189855d705 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -336,6 +336,11 @@  DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
 DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_f32_f16, x2, none)
+DEF_SVE_FUNCTION_GS (svcvtl, unary_convertxn, cvt_f32_f16, x2, none)
+#undef REQUIRED_EXTENSIONS
+
 #define REQUIRED_EXTENSIONS \
   sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_SVE_B16B16, \
 	       AARCH64_FL_SME2 | AARCH64_FL_SVE_B16B16)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index bb610cb792b..bfe3d170e70 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -61,6 +61,7 @@  namespace aarch64_sve
     extern const function_base *const svcdot_lane;
     extern const function_base *const svclamp;
     extern const function_base *const svcntp;
+    extern const function_base *const svcvtl;
     extern const function_base *const svcvtlt;
     extern const function_base *const svcvtn;
     extern const function_base *const svcvtx;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 75b35f6b546..b1d225ed046 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -296,6 +296,10 @@  CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_h_bfloat(S, D) \
   S (bf16)
 
+/* _f16.  */
+#define TYPES_h_float(S, D) \
+  S (f16)
+
 /* _s16
    _u16.  */
 #define TYPES_h_integer(S, D) \
@@ -450,6 +454,10 @@  CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_cvt_h_s_float(S, D) \
   D (bf16, f32), D (f16, f32)
 
+/* _f32_f16.  */
+#define TYPES_cvt_f32_f16(S, D) \
+  D (f32, f16)
+
 /* _f32_f16
    _f64_f32.  */
 #define TYPES_cvt_long(S, D) \
@@ -625,6 +633,10 @@  CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_za_bhsd_data (S, D), \
   TYPES_reinterpret1 (D, za128)
 
+/* _za16_f16.  */
+#define TYPES_za_h_float(S, D) \
+  D (za16, f16)
+
 /* _za32_s8.  */
 #define TYPES_za_s_b_signed(S, D) \
    D (za32, s8)
@@ -744,6 +756,7 @@  DEF_SVE_TYPES_ARRAY (bhs_data);
 DEF_SVE_TYPES_ARRAY (bhs_widen);
 DEF_SVE_TYPES_ARRAY (c);
 DEF_SVE_TYPES_ARRAY (h_bfloat);
+DEF_SVE_TYPES_ARRAY (h_float);
 DEF_SVE_TYPES_ARRAY (h_integer);
 DEF_SVE_TYPES_ARRAY (hs_signed);
 DEF_SVE_TYPES_ARRAY (hs_integer);
@@ -772,6 +785,7 @@  DEF_SVE_TYPES_ARRAY (d_data);
 DEF_SVE_TYPES_ARRAY (cvt);
 DEF_SVE_TYPES_ARRAY (cvt_bfloat);
 DEF_SVE_TYPES_ARRAY (cvt_h_s_float);
+DEF_SVE_TYPES_ARRAY (cvt_f32_f16);
 DEF_SVE_TYPES_ARRAY (cvt_long);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
@@ -793,6 +807,7 @@  DEF_SVE_TYPES_ARRAY (all_za);
 DEF_SVE_TYPES_ARRAY (d_za);
 DEF_SVE_TYPES_ARRAY (za_bhsd_data);
 DEF_SVE_TYPES_ARRAY (za_all_data);
+DEF_SVE_TYPES_ARRAY (za_h_float);
 DEF_SVE_TYPES_ARRAY (za_s_b_signed);
 DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);
 DEF_SVE_TYPES_ARRAY (za_s_b_integer);
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index a721a6889b1..95384797eab 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -97,6 +97,7 @@ 
 ;; == Conversions
 ;; ---- [FP<-FP] Widening conversions
 ;; ---- [FP<-FP] Narrowing conversions
+;; ---- [FP<-FP] Multi-vector widening conversions
 ;; ---- [FP<-FP] Multi-vector narrowing conversions
 ;; ---- [FP<-INT] Multi-vector conversions
 ;; ---- [INT<-FP] Multi-vector conversions
@@ -3115,6 +3116,31 @@  (define_insn "@aarch64_sve2_cvtxnt<mode>"
   "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Multi-vector widening conversions
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - FCVT
+;; - FCVTL
+;; -------------------------------------------------------------------------
+
+(define_insn "extendvnx8hfvnx8sf2"
+  [(set (match_operand:VNx8SF 0 "aligned_register_operand" "=Uw2")
+	(float_extend:VNx8SF
+	  (match_operand:VNx8HF 1 "register_operand" "w")))]
+  "TARGET_STREAMING_SME_F16F16"
+  "fcvt\t%0, %1.h"
+)
+
+(define_insn "@aarch64_sve_cvtl<mode>"
+  [(set (match_operand:VNx8SF_ONLY 0 "aligned_register_operand" "=Uw2")
+	(unspec:VNx8SF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand" "w")]
+	  UNSPEC_FCVTL))]
+  "TARGET_STREAMING_SME_F16F16"
+  "fcvtl\t%0, %1.h"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Multi-vector narrowing conversions
 ;; -------------------------------------------------------------------------
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index c22a6bb69d8..b5a6e2755d4 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -349,6 +349,10 @@  constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64.  */
 #define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64)
 
+/* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16.  */
+#define TARGET_STREAMING_SME_F16F16 \
+  (AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING)
+
 /* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64.  */
 #define TARGET_SME_F64F64 AARCH64_HAVE_ISA (SME_F64F64)
 
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 3852f0d42fb..f0ca3f4b892 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -432,6 +432,7 @@  (define_mode_iterator VNx16SI_ONLY [VNx16SI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
 (define_mode_iterator VNx8BF_ONLY [VNx8BF])
 (define_mode_iterator VNx8SI_ONLY [VNx8SI])
+(define_mode_iterator VNx8SF_ONLY [VNx8SF])
 (define_mode_iterator VNx8DI_ONLY [VNx8DI])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
@@ -681,12 +682,17 @@  (define_mode_iterator SME_ZA_HIx24 [VNx16HI VNx32HI])
 (define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64")
 				     VNx16SI (VNx8DI "TARGET_SME_I16I64")])
 
-(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64")
-				     VNx16SF (VNx8DF "TARGET_SME_F64F64")])
+(define_mode_iterator SME_ZA_HSDFx24 [VNx8SF VNx16SF
+				      (VNx4DF "TARGET_SME_F64F64")
+				      (VNx8DF "TARGET_SME_F64F64")
+				      (VNx16HF "TARGET_STREAMING_SME_F16F16")
+				      (VNx32HF "TARGET_STREAMING_SME_F16F16")])
 
 ;; The modes for which outer product instructions are supported.
 (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
-(define_mode_iterator SME_MOP_SDF [VNx4SF (VNx2DF "TARGET_SME_F64F64")])
+(define_mode_iterator SME_MOP_HSDF [VNx4SF
+				    (VNx2DF "TARGET_SME_F64F64")
+				    (VNx8HF "TARGET_STREAMING_SME_F16F16")])
 
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
@@ -1093,6 +1099,7 @@  (define_c_enum "unspec"
     UNSPEC_FMAXNMQV
     UNSPEC_FMINQV
     UNSPEC_FMINNMQV
+    UNSPEC_FCVTL
     UNSPEC_FCVTN
     UNSPEC_FDOT
     UNSPEC_LD1_EXTENDQ
@@ -3681,7 +3688,7 @@  (define_int_iterator SME2_BMOP [UNSPEC_SME_BMOPA UNSPEC_SME_BMOPS])
 
 (define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB])
 
-(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
+(define_int_iterator SME_BINARY_SLICE_HSDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
 
 (define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE
 						 UNSPEC_SME_SUB_WRITE])
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1e50edb53ac..b9db38ac873 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -21788,11 +21788,16 @@  Enable the Common Short Sequence Compression instructions.
 @item sme
 Enable the Scalable Matrix Extension.
 @item sme-i16i64
-Enable the FEAT_SME_I16I64 extension to SME.
+Enable the FEAT_SME_I16I64 extension to SME.  This also enables SME
+instructions.
 @item sme-f64f64
-Enable the FEAT_SME_F64F64 extension to SME.
+Enable the FEAT_SME_F64F64 extension to SME.  This also enables SME
+instructions.
 @item sme2
 Enable the Scalable Matrix Extension 2.  This also enables SME instructions.
+@item sme-f16f16
+Enable the FEAT_SME_F16F16 extension to SME.  This also enables SME2
+instructions.
 @item lse128
 Enable the LSE128 128-bit atomic instructions extension.  This also
 enables LSE instructions.
diff --git a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
index 9dd346faf96..6d82458be8e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/pragma_cpp_predefs_4.c
@@ -70,19 +70,45 @@ 
 #ifdef __ARM_FEATURE_SME_I16I64
 #error Foo
 #endif
+#ifdef __ARM_FEATURE_SME_F16F16
+#error Foo
+#endif
 #ifdef __ARM_FEATURE_SME_F64F64
 #error Foo
 #endif
 
-#pragma GCC target "+sme-i16i64"
+#pragma GCC target "+nothing+sme-i16i64"
 #ifndef __ARM_FEATURE_SME_I16I64
 #error Foo
 #endif
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_SME2
+#error Foo
+#endif
+
+#pragma GCC target "+nothing+sme-f16f16"
+#ifndef __ARM_FEATURE_SME_F16F16
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifndef __ARM_FEATURE_SME2
+#error Foo
+#endif
 
-#pragma GCC target "+sme-f64f64"
+#pragma GCC target "+nothing+sme-f64f64"
 #ifndef __ARM_FEATURE_SME_F64F64
 #error Foo
 #endif
+#ifndef __ARM_FEATURE_SME
+#error Foo
+#endif
+#ifdef __ARM_FEATURE_SME2
+#error Foo
+#endif
 
 #pragma GCC target "+nothing+sve-b16b16"
 #ifdef __ARM_FEATURE_SVE_B16B16
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..80e3de6b543
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x2.c
@@ -0,0 +1,126 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** add_0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (0, z0),
+	    svadd_za16_vg1x2 (0, z0))
+
+/*
+** add_w0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w0, z0),
+	    svadd_za16_vg1x2 (w0, z0))
+
+/*
+** add_w7_z0:
+**	mov	(w8|w9|w10|w11), w7
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w7, z0),
+	    svadd_za16_vg1x2 (w7, z0))
+
+/*
+** add_w8_z0:
+**	fadd	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8, z0),
+	    svadd_za16_vg1x2 (w8, z0))
+
+/*
+** add_w11_z0:
+**	fadd	za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w11, z0),
+	    svadd_za16_vg1x2 (w11, z0))
+
+
+/*
+** add_w12_z0:
+**	mov	(w8|w9|w10|w11), w12
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w12, z0),
+	    svadd_za16_vg1x2 (w12, z0))
+
+/*
+** add_w8p7_z0:
+**	fadd	za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8 + 7, z0),
+	    svadd_za16_vg1x2 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8 + 8, z0),
+	    svadd_za16_vg1x2 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fadd	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8 - 1, z0),
+	    svadd_za16_vg1x2 (w8 - 1, z0))
+
+/*
+** add_w8_z18:
+**	fadd	za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8, z18),
+	    svadd_za16_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** add_w8_z23:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fadd	za\.h\[w8, 0, vgx2\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8, z23),
+	    svadd_za16_vg1x2 (w8, z23))
+
+/*
+** add_w8_z28:
+**	fadd	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat16x2_t,
+	    svadd_za16_f16_vg1x2 (w8, z28),
+	    svadd_za16_vg1x2 (w8, z28))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..21ca0114948
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/add_za16_f16_vg1x4.c
@@ -0,0 +1,141 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** add_0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_0_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (0, z0),
+	    svadd_za16_vg1x4 (0, z0))
+
+/*
+** add_w0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w0_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w0, z0),
+	    svadd_za16_vg1x4 (w0, z0))
+
+/*
+** add_w7_z0:
+**	mov	(w8|w9|w10|w11), w7
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w7_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w7, z0),
+	    svadd_za16_vg1x4 (w7, z0))
+
+/*
+** add_w8_z0:
+**	fadd	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8, z0),
+	    svadd_za16_vg1x4 (w8, z0))
+
+/*
+** add_w11_z0:
+**	fadd	za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w11_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w11, z0),
+	    svadd_za16_vg1x4 (w11, z0))
+
+
+/*
+** add_w12_z0:
+**	mov	(w8|w9|w10|w11), w12
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w12_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w12, z0),
+	    svadd_za16_vg1x4 (w12, z0))
+
+/*
+** add_w8p7_z0:
+**	fadd	za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p7_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8 + 7, z0),
+	    svadd_za16_vg1x4 (w8 + 7, z0))
+
+/*
+** add_w8p8_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8p8_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8 + 8, z0),
+	    svadd_za16_vg1x4 (w8 + 8, z0))
+
+/*
+** add_w8m1_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fadd	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8m1_z0, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8 - 1, z0),
+	    svadd_za16_vg1x4 (w8 - 1, z0))
+
+/*
+** add_w8_z4:
+**	fadd	za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z4, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8, z4),
+	    svadd_za16_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** add_w8_z18:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fadd	za\.h\[w8, 0, vgx4\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (add_w8_z18, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8, z18),
+	    svadd_za16_vg1x4 (w8, z18))
+
+/*
+** add_w8_z23:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fadd	za\.h\[w8, 0, vgx4\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (add_w8_z23, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8, z23),
+	    svadd_za16_vg1x4 (w8, z23))
+
+/*
+** add_w8_z28:
+**	fadd	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}
+**	ret
+*/
+TEST_ZA_XN (add_w8_z28, svfloat16x4_t,
+	    svadd_za16_f16_vg1x4 (w8, z28),
+	    svadd_za16_vg1x4 (w8, z28))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c
new file mode 100644
index 00000000000..8d610042367
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvt_f32_f16_x2.c
@@ -0,0 +1,54 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** cvt_z0_z0:
+**	fcvt	{z0\.s - z1\.s}, z0\.h
+**	ret
+*/
+TEST_X2_WIDE (cvt_z0_z0, svfloat32x2_t, svfloat16_t,
+	      z0_res = svcvt_f32_f16_x2 (z0),
+	      z0_res = svcvt_f32 (z0))
+
+/*
+** cvt_z0_z6:
+**	fcvt	{z6\.s - z7\.s}, z0\.h
+**	ret
+*/
+TEST_X2_WIDE (cvt_z0_z6, svfloat32x2_t, svfloat16_t,
+	      z6 = svcvt_f32_f16_x2 (z0),
+	      z6 = svcvt_f32 (z0))
+
+/*
+** cvt_z0_z29:
+**	fcvt	[^\n]+, z0\.h
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	ret
+*/
+TEST_X2_WIDE (cvt_z0_z29, svfloat32x2_t, svfloat16_t,
+	      z29 = svcvt_f32_f16_x2 (z0),
+	      z29 = svcvt_f32 (z0))
+
+/*
+** cvt_z5_z0:
+**	fcvt	{z0\.s - z1\.s}, z5\.h
+**	ret
+*/
+TEST_X2_WIDE (cvt_z5_z0, svfloat32x2_t, svfloat16_t,
+	      z0_res = svcvt_f32_f16_x2 (z5),
+	      z0_res = svcvt_f32 (z5))
+
+/*
+** cvt_z22_z16:
+**	fcvt	{z16\.s - z17\.s}, z22\.h
+**	ret
+*/
+TEST_X2_WIDE (cvt_z22_z16, svfloat32x2_t, svfloat16_t,
+	      z16 = svcvt_f32_f16_x2 (z22),
+	      z16 = svcvt_f32 (z22))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c
new file mode 100644
index 00000000000..42e89b531f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/cvtl_f32_f16_x2.c
@@ -0,0 +1,54 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** cvtl_z0_z0:
+**	fcvtl	{z0\.s - z1\.s}, z0\.h
+**	ret
+*/
+TEST_X2_WIDE (cvtl_z0_z0, svfloat32x2_t, svfloat16_t,
+	      z0_res = svcvtl_f32_f16_x2 (z0),
+	      z0_res = svcvtl_f32 (z0))
+
+/*
+** cvtl_z0_z6:
+**	fcvtl	{z6\.s - z7\.s}, z0\.h
+**	ret
+*/
+TEST_X2_WIDE (cvtl_z0_z6, svfloat32x2_t, svfloat16_t,
+	      z6 = svcvtl_f32_f16_x2 (z0),
+	      z6 = svcvtl_f32 (z0))
+
+/*
+** cvtl_z0_z29:
+**	fcvtl	[^\n]+, z0\.h
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	ret
+*/
+TEST_X2_WIDE (cvtl_z0_z29, svfloat32x2_t, svfloat16_t,
+	      z29 = svcvtl_f32_f16_x2 (z0),
+	      z29 = svcvtl_f32 (z0))
+
+/*
+** cvtl_z5_z0:
+**	fcvtl	{z0\.s - z1\.s}, z5\.h
+**	ret
+*/
+TEST_X2_WIDE (cvtl_z5_z0, svfloat32x2_t, svfloat16_t,
+	      z0_res = svcvtl_f32_f16_x2 (z5),
+	      z0_res = svcvtl_f32 (z5))
+
+/*
+** cvtl_z22_z16:
+**	fcvtl	{z16\.s - z17\.s}, z22\.h
+**	ret
+*/
+TEST_X2_WIDE (cvtl_z22_z16, svfloat32x2_t, svfloat16_t,
+	      z16 = svcvtl_f32_f16_x2 (z22),
+	      z16 = svcvtl_f32 (z22))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..ecb3fc2381c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x2.c
@@ -0,0 +1,106 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mla_lane_0_z0_z4_0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (0, z0, z4, 0),
+	      svmla_lane_za16_vg1x2 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w0, z0, z7, 1),
+	      svmla_lane_za16_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+**	fmla	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8, z28, z4, 2),
+	      svmla_lane_za16_vg1x2 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+**	fmla	za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8 + 7, z0, z4, 3),
+	      svmla_lane_za16_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8 + 8, z0, z4, 4),
+	      svmla_lane_za16_vg1x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w0 - 1, z0, z4, 5),
+	      svmla_lane_za16_vg1x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+**	str	d15, \[sp, #?-16\]!
+**	fmla	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t,
+		  svmla_lane_za16_f16_vg1x2 (w8, z4, z15, 6),
+		  svmla_lane_za16_vg1x2 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+**	mov	(z[0-7]).d, z16.d
+**	fmla	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8, z28, z16, 7),
+	      svmla_lane_za16_vg1x2 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmla	za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8, z17, z7, 0),
+	      svmla_lane_za16_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+**	fmla	za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x2 (w8, z22, z4, 1),
+	      svmla_lane_za16_vg1x2 (w8, z22, z4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..8ff575d598d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_lane_za16_f16_vg1x4.c
@@ -0,0 +1,112 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mla_lane_0_z0_z4_0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (0, z0, z4, 0),
+	      svmla_lane_za16_vg1x4 (0, z0, z4, 0))
+
+/*
+** mla_lane_w0_z0_z7_1:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w0, z0, z7, 1),
+	      svmla_lane_za16_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mla_lane_w8_z28_z4_2:
+**	fmla	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8, z28, z4, 2),
+	      svmla_lane_za16_vg1x4 (w8, z28, z4, 2))
+
+/*
+** mla_lane_w8p7_z0_z4_3:
+**	fmla	za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8 + 7, z0, z4, 3),
+	      svmla_lane_za16_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mla_lane_w8p8_z0_z4_4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8 + 8, z0, z4, 4),
+	      svmla_lane_za16_vg1x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mla_lane_w0m1_z0_z4_5:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w0 - 1, z0, z4, 5),
+	      svmla_lane_za16_vg1x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mla_lane_w8_z4_z15_6:
+**	str	d15, \[sp, #?-16\]!
+**	fmla	za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_LANE_Z15 (mla_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t,
+		  svmla_lane_za16_f16_vg1x4 (w8, z4, z15, 6),
+		  svmla_lane_za16_vg1x4 (w8, z4, z15, 6))
+
+/*
+** mla_lane_w8_z28_z16_7:
+**	mov	(z[0-7]).d, z16.d
+**	fmla	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8, z28, z16, 7),
+	      svmla_lane_za16_vg1x4 (w8, z28, z16, 7))
+
+/*
+** mla_lane_w8_z17_z7_0:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmla	za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8, z17, z7, 0),
+	      svmla_lane_za16_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mla_lane_w8_z22_z4_1:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmla	za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mla_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t,
+	      svmla_lane_za16_f16_vg1x4 (w8, z22, z4, 1),
+	      svmla_lane_za16_vg1x4 (w8, z22, z4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..f4a723aba2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x2.c
@@ -0,0 +1,184 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mla_0_z0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (0, z0, z0),
+	    svmla_za16_vg1x2 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w0, z0, z0),
+	    svmla_za16_vg1x2 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+**	fmla	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8, z0, z4),
+	    svmla_za16_vg1x2 (w8, z0, z4))
+
+/*
+** mla_w8_z4_z18:
+**	fmla	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z4_z18, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8, z4, z18),
+	    svmla_za16_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** mla_w8_z23_z0:
+**	...
+**	fmla	za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z23_z0, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8, z23, z0),
+	    svmla_za16_vg1x2 (w8, z23, z0))
+
+/*
+** mla_w8_z18_z23:
+**	...
+**	fmla	za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z18_z23, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8, z18, z23),
+	    svmla_za16_vg1x2 (w8, z18, z23))
+
+/*
+** mla_w8_z4_z28:
+**	fmla	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z4_z28, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8, z4, z28),
+	    svmla_za16_vg1x2 (w8, z4, z28))
+
+/*
+** mla_w8p7_z4_z0:
+**	fmla	za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8 + 7, z4, z0),
+	    svmla_za16_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8 + 8, z4, z4),
+	    svmla_za16_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fmla	za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x2_t,
+	    svmla_za16_f16_vg1x2 (w8 - 1, z4, z0),
+	    svmla_za16_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (0, z1, z0),
+	        svmla_za16_vg1x2 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w0, z1, z0),
+	        svmla_za16_vg1x2 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+**	fmla	za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w8, z1, z0),
+	        svmla_za16_vg1x2 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+**	fmla	za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w8 + 7, z1, z0),
+	        svmla_za16_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w8 + 8, z1, z0),
+	        svmla_za16_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmla	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w0 - 1, z1, z0),
+	        svmla_za16_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+**	str	d15, \[sp, #?-16\]!
+**	fmla	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x2_t, svfloat16_t,
+		    svmla_single_za16_f16_vg1x2 (w8, z0, z15),
+		    svmla_za16_vg1x2 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+**	mov	(z[0-7]).d, z16.d
+**	fmla	za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x2_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x2 (w8, z20, z16),
+	        svmla_za16_vg1x2 (w8, z20, z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..420be0dd50f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mla_za16_f16_vg1x4.c
@@ -0,0 +1,176 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mla_0_z0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_0_z0_z0, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (0, z0, z0),
+	    svmla_za16_vg1x4 (0, z0, z0))
+
+/*
+** mla_w0_z0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w0_z0_z0, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w0, z0, z0),
+	    svmla_za16_vg1x4 (w0, z0, z0))
+
+/*
+** mla_w8_z0_z4:
+**	fmla	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z0_z4, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8, z0, z4),
+	    svmla_za16_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** mla_w8_z0_z18:
+**	...
+**	fmla	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z0_z18, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8, z0, z18),
+	    svmla_za16_vg1x4 (w8, z0, z18))
+
+/*
+** mla_w8_z18_z28:
+**	...
+**	fmla	za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z18_z28, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8, z18, z28),
+	    svmla_za16_vg1x4 (w8, z18, z28))
+
+/*
+** mla_w8_z28_z23:
+**	...
+**	fmla	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mla_w8_z28_z23, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8, z28, z23),
+	    svmla_za16_vg1x4 (w8, z28, z23))
+
+/*
+** mla_w8p7_z4_z0:
+**	fmla	za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8p7_z4_z0, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8 + 7, z4, z0),
+	    svmla_za16_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mla_w8p8_z4_z4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8p8_z4_z4, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8 + 8, z4, z4),
+	    svmla_za16_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mla_w8m1_z4_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fmla	za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mla_w8m1_z4_z0, svfloat16x4_t,
+	    svmla_za16_f16_vg1x4 (w8 - 1, z4, z0),
+	    svmla_za16_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mla_single_0_z1_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmla	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_0_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (0, z1, z0),
+	        svmla_za16_vg1x4 (0, z1, z0))
+
+/*
+** mla_single_w0_z1_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmla	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w0_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w0, z1, z0),
+	        svmla_za16_vg1x4 (w0, z1, z0))
+
+/*
+** mla_single_w8_z1_z0:
+**	fmla	za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w8, z1, z0),
+	        svmla_za16_vg1x4 (w8, z1, z0))
+
+/*
+** mla_single_w8p7_z1_z0:
+**	fmla	za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w8 + 7, z1, z0),
+	        svmla_za16_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mla_single_w8p8_z1_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmla	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w8 + 8, z1, z0),
+	        svmla_za16_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mla_single_w0m1_z1_z0:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmla	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w0 - 1, z1, z0),
+	        svmla_za16_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mla_single_w8_z0_z15:
+**	str	d15, \[sp, #?-16\]!
+**	fmla	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_SINGLE_Z15 (mla_single_w8_z0_z15, svfloat16x4_t, svfloat16_t,
+		    svmla_single_za16_f16_vg1x4 (w8, z0, z15),
+		    svmla_za16_vg1x4 (w8, z0, z15))
+
+/*
+** mla_single_w8_z20_z16:
+**	mov	(z[0-7]).d, z16.d
+**	fmla	za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mla_single_w8_z20_z16, svfloat16x4_t, svfloat16_t,
+	        svmla_single_za16_f16_vg1x4 (w8, z20, z16),
+	        svmla_za16_vg1x4 (w8, z20, z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..9d796cab066
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x2.c
@@ -0,0 +1,106 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mls_lane_0_z0_z4_0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (0, z0, z4, 0),
+	      svmls_lane_za16_vg1x2 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z7\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w0, z0, z7, 1),
+	      svmls_lane_za16_vg1x2 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+**	fmls	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, z4\.h\[2\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8, z28, z4, 2),
+	      svmls_lane_za16_vg1x2 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+**	fmls	za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}, z4\.h\[3\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8 + 7, z0, z4, 3),
+	      svmls_lane_za16_vg1x2 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[4\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8 + 8, z0, z4, 4),
+	      svmls_lane_za16_vg1x2 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, z4\.h\[5\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w0 - 1, z0, z4, 5),
+	      svmls_lane_za16_vg1x2 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+**	str	d15, \[sp, #?-16\]!
+**	fmls	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, z15\.h\[6\]
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x2_t, svfloat16_t,
+		  svmls_lane_za16_f16_vg1x2 (w8, z4, z15, 6),
+		  svmls_lane_za16_vg1x2 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+**	mov	(z[0-7]).d, z16.d
+**	fmls	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}, \1\.h\[7\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8, z28, z16, 7),
+	      svmls_lane_za16_vg1x2 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmls	za\.h\[w8, 0, vgx2\], [^\n]+, z7\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8, z17, z7, 0),
+	      svmls_lane_za16_vg1x2 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+**	fmls	za\.h\[w8, 0, vgx2\], {z22\.h - z23\.h}, z4\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x2_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x2 (w8, z22, z4, 1),
+	      svmls_lane_za16_vg1x2 (w8, z22, z4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..e5f5828836f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_lane_za16_f16_vg1x4.c
@@ -0,0 +1,112 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mls_lane_0_z0_z4_0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_0_z0_z4_0, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (0, z0, z4, 0),
+	      svmls_lane_za16_vg1x4 (0, z0, z4, 0))
+
+/*
+** mls_lane_w0_z0_z7_1:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z7\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w0_z0_z7_1, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w0, z0, z7, 1),
+	      svmls_lane_za16_vg1x4 (w0, z0, z7, 1))
+
+/*
+** mls_lane_w8_z28_z4_2:
+**	fmls	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, z4\.h\[2\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z4_2, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8, z28, z4, 2),
+	      svmls_lane_za16_vg1x4 (w8, z28, z4, 2))
+
+/*
+** mls_lane_w8p7_z0_z4_3:
+**	fmls	za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}, z4\.h\[3\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8p7_z0_z4_3, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8 + 7, z0, z4, 3),
+	      svmls_lane_za16_vg1x4 (w8 + 7, z0, z4, 3))
+
+/*
+** mls_lane_w8p8_z0_z4_4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[4\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8p8_z0_z4_4, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8 + 8, z0, z4, 4),
+	      svmls_lane_za16_vg1x4 (w8 + 8, z0, z4, 4))
+
+/*
+** mls_lane_w0m1_z0_z4_5:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, z4\.h\[5\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w0m1_z0_z4_5, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w0 - 1, z0, z4, 5),
+	      svmls_lane_za16_vg1x4 (w0 - 1, z0, z4, 5))
+
+/*
+** mls_lane_w8_z4_z15_6:
+**	str	d15, \[sp, #?-16\]!
+**	fmls	za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}, z15\.h\[6\]
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_LANE_Z15 (mls_lane_w8_z4_z15_6, svfloat16x4_t, svfloat16_t,
+		  svmls_lane_za16_f16_vg1x4 (w8, z4, z15, 6),
+		  svmls_lane_za16_vg1x4 (w8, z4, z15, 6))
+
+/*
+** mls_lane_w8_z28_z16_7:
+**	mov	(z[0-7]).d, z16.d
+**	fmls	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, \1\.h\[7\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z28_z16_7, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8, z28, z16, 7),
+	      svmls_lane_za16_vg1x4 (w8, z28, z16, 7))
+
+/*
+** mls_lane_w8_z17_z7_0:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmls	za\.h\[w8, 0, vgx4\], [^\n]+, z7\.h\[0\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z17_z7_0, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8, z17, z7, 0),
+	      svmls_lane_za16_vg1x4 (w8, z17, z7, 0))
+
+/*
+** mls_lane_w8_z22_z4_1:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fmls	za\.h\[w8, 0, vgx4\], [^\n]+, z4\.h\[1\]
+**	ret
+*/
+TEST_ZA_LANE (mls_lane_w8_z22_z4_1, svfloat16x4_t, svfloat16_t,
+	      svmls_lane_za16_f16_vg1x4 (w8, z22, z4, 1),
+	      svmls_lane_za16_vg1x4 (w8, z22, z4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..a333bee67b8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x2.c
@@ -0,0 +1,184 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mls_0_z0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (0, z0, z0),
+	    svmls_za16_vg1x2 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w0, z0, z0),
+	    svmls_za16_vg1x2 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+**	fmls	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, {z4\.h - z5\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8, z0, z4),
+	    svmls_za16_vg1x2 (w8, z0, z4))
+
+/*
+** mls_w8_z4_z18:
+**	fmls	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z18\.h - z19\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z4_z18, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8, z4, z18),
+	    svmls_za16_vg1x2 (w8, z4, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** mls_w8_z23_z0:
+**	...
+**	fmls	za\.h\[w8, 0, vgx2\], [^\n]+, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z23_z0, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8, z23, z0),
+	    svmls_za16_vg1x2 (w8, z23, z0))
+
+/*
+** mls_w8_z18_z23:
+**	...
+**	fmls	za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z18_z23, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8, z18, z23),
+	    svmls_za16_vg1x2 (w8, z18, z23))
+
+/*
+** mls_w8_z4_z28:
+**	fmls	za\.h\[w8, 0, vgx2\], {z4\.h - z5\.h}, {z28\.h - z29\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z4_z28, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8, z4, z28),
+	    svmls_za16_vg1x2 (w8, z4, z28))
+
+/*
+** mls_w8p7_z4_z0:
+**	fmls	za\.h\[w8, 7, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8 + 7, z4, z0),
+	    svmls_za16_vg1x2 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z4\.h - z5\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8 + 8, z4, z4),
+	    svmls_za16_vg1x2 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fmls	za\.h\[\1, 0, vgx2\], {z4\.h - z5\.h}, {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x2_t,
+	    svmls_za16_f16_vg1x2 (w8 - 1, z4, z0),
+	    svmls_za16_vg1x2 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (0, z1, z0),
+	        svmls_za16_vg1x2 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w0, z1, z0),
+	        svmls_za16_vg1x2 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+**	fmls	za\.h\[w8, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w8, z1, z0),
+	        svmls_za16_vg1x2 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+**	fmls	za\.h\[w8, 7, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w8 + 7, z1, z0),
+	        svmls_za16_vg1x2 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w8 + 8, z1, z0),
+	        svmls_za16_vg1x2 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmls	za\.h\[\1, 0, vgx2\], {z1\.h - z2\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w0 - 1, z1, z0),
+	        svmls_za16_vg1x2 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+**	str	d15, \[sp, #?-16\]!
+**	fmls	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}, z15\.h
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x2_t, svfloat16_t,
+		    svmls_single_za16_f16_vg1x2 (w8, z0, z15),
+		    svmls_za16_vg1x2 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+**	mov	(z[0-7]).d, z16.d
+**	fmls	za\.h\[w8, 0, vgx2\], {z20\.h - z21\.h}, \1\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x2_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x2 (w8, z20, z16),
+	        svmls_za16_vg1x2 (w8, z20, z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..b0485be14e5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mls_za16_f16_vg1x4.c
@@ -0,0 +1,176 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mls_0_z0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_0_z0_z0, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (0, z0, z0),
+	    svmls_za16_vg1x4 (0, z0, z0))
+
+/*
+** mls_w0_z0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w0_z0_z0, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w0, z0, z0),
+	    svmls_za16_vg1x4 (w0, z0, z0))
+
+/*
+** mls_w8_z0_z4:
+**	fmls	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z0_z4, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8, z0, z4),
+	    svmls_za16_vg1x4 (w8, z0, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** mls_w8_z0_z18:
+**	...
+**	fmls	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z0_z18, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8, z0, z18),
+	    svmls_za16_vg1x4 (w8, z0, z18))
+
+/*
+** mls_w8_z18_z28:
+**	...
+**	fmls	za\.h\[w8, 0, vgx4\], [^\n]+, {z28\.h - z31\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z18_z28, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8, z18, z28),
+	    svmls_za16_vg1x4 (w8, z18, z28))
+
+/*
+** mls_w8_z28_z23:
+**	...
+**	fmls	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}, [^\n]+
+**	ret
+*/
+TEST_ZA_XN (mls_w8_z28_z23, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8, z28, z23),
+	    svmls_za16_vg1x4 (w8, z28, z23))
+
+/*
+** mls_w8p7_z4_z0:
+**	fmls	za\.h\[w8, 7, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8p7_z4_z0, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8 + 7, z4, z0),
+	    svmls_za16_vg1x4 (w8 + 7, z4, z0))
+
+/*
+** mls_w8p8_z4_z4:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8p8_z4_z4, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8 + 8, z4, z4),
+	    svmls_za16_vg1x4 (w8 + 8, z4, z4))
+
+/*
+** mls_w8m1_z4_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fmls	za\.h\[\1, 0, vgx4\], {z4\.h - z7\.h}, {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (mls_w8m1_z4_z0, svfloat16x4_t,
+	    svmls_za16_f16_vg1x4 (w8 - 1, z4, z0),
+	    svmls_za16_vg1x4 (w8 - 1, z4, z0))
+
+/*
+** mls_single_0_z1_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fmls	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_0_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (0, z1, z0),
+	        svmls_za16_vg1x4 (0, z1, z0))
+
+/*
+** mls_single_w0_z1_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fmls	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w0_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w0, z1, z0),
+	        svmls_za16_vg1x4 (w0, z1, z0))
+
+/*
+** mls_single_w8_z1_z0:
+**	fmls	za\.h\[w8, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w8, z1, z0),
+	        svmls_za16_vg1x4 (w8, z1, z0))
+
+/*
+** mls_single_w8p7_z1_z0:
+**	fmls	za\.h\[w8, 7, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p7_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w8 + 7, z1, z0),
+	        svmls_za16_vg1x4 (w8 + 7, z1, z0))
+
+/*
+** mls_single_w8p8_z1_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fmls	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8p8_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w8 + 8, z1, z0),
+	        svmls_za16_vg1x4 (w8 + 8, z1, z0))
+
+/*
+** mls_single_w0m1_z1_z0:
+**	sub	(w8|w9|w10|w11), w0, #?1
+**	fmls	za\.h\[\1, 0, vgx4\], {z1\.h - z4\.h}, z0\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w0m1_z1_z0, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w0 - 1, z1, z0),
+	        svmls_za16_vg1x4 (w0 - 1, z1, z0))
+
+/*
+** mls_single_w8_z0_z15:
+**	str	d15, \[sp, #?-16\]!
+**	fmls	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}, z15\.h
+**	ldr	d15, \[sp\], #?16
+**	ret
+*/
+TEST_ZA_SINGLE_Z15 (mls_single_w8_z0_z15, svfloat16x4_t, svfloat16_t,
+		    svmls_single_za16_f16_vg1x4 (w8, z0, z15),
+		    svmls_za16_vg1x4 (w8, z0, z15))
+
+/*
+** mls_single_w8_z20_z16:
+**	mov	(z[0-7]).d, z16.d
+**	fmls	za\.h\[w8, 0, vgx4\], {z20\.h - z23\.h}, \1\.h
+**	ret
+*/
+TEST_ZA_SINGLE (mls_single_w8_z20_z16, svfloat16x4_t, svfloat16_t,
+	        svmls_single_za16_f16_vg1x4 (w8, z20, z16),
+	        svmls_za16_vg1x4 (w8, z20, z16))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c
new file mode 100644
index 00000000000..1219f838e33
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mopa_za16_f16.c
@@ -0,0 +1,34 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mopa_za16_f16_0_p0_p1_z0_z1:
+**	fmopa	za0\.h, p0/m, p1/m, z0\.h, z1\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_f16_0_p0_p1_z0_z1, svfloat16_t,
+		 svmopa_za16_f16_m (0, p0, p1, z0, z1),
+		 svmopa_za16_m (0, p0, p1, z0, z1))
+
+/*
+** mopa_za16_f16_0_p1_p0_z1_z0:
+**	fmopa	za0\.h, p1/m, p0/m, z1\.h, z0\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_f16_0_p1_p0_z1_z0, svfloat16_t,
+		 svmopa_za16_f16_m (0, p1, p0, z1, z0),
+		 svmopa_za16_m (0, p1, p0, z1, z0))
+
+/*
+** mopa_za16_f16_1_p0_p1_z0_z1:
+**	fmopa	za1\.h, p0/m, p1/m, z0\.h, z1\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mopa_za16_f16_1_p0_p1_z0_z1, svfloat16_t,
+		 svmopa_za16_f16_m (1, p0, p1, z0, z1),
+		 svmopa_za16_m (1, p0, p1, z0, z1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c
new file mode 100644
index 00000000000..fe22df6b042
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/mops_za16_f16.c
@@ -0,0 +1,34 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** mops_za16_f16_0_p0_p1_z0_z1:
+**	fmops	za0\.h, p0/m, p1/m, z0\.h, z1\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mops_za16_f16_0_p0_p1_z0_z1, svfloat16_t,
+		 svmops_za16_f16_m (0, p0, p1, z0, z1),
+		 svmops_za16_m (0, p0, p1, z0, z1))
+
+/*
+** mops_za16_f16_0_p1_p0_z1_z0:
+**	fmops	za0\.h, p1/m, p0/m, z1\.h, z0\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mops_za16_f16_0_p1_p0_z1_z0, svfloat16_t,
+		 svmops_za16_f16_m (0, p1, p0, z1, z0),
+		 svmops_za16_m (0, p1, p0, z1, z0))
+
+/*
+** mops_za16_f16_1_p0_p1_z0_z1:
+**	fmops	za1\.h, p0/m, p1/m, z0\.h, z1\.h
+**	ret
+*/
+TEST_UNIFORM_ZA (mops_za16_f16_1_p0_p1_z0_z1, svfloat16_t,
+		 svmops_za16_f16_m (1, p0, p1, z0, z1),
+		 svmops_za16_m (1, p0, p1, z0, z1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c
new file mode 100644
index 00000000000..114e85e31f8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x2.c
@@ -0,0 +1,126 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** sub_0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (0, z0),
+	    svsub_za16_vg1x2 (0, z0))
+
+/*
+** sub_w0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w0, z0),
+	    svsub_za16_vg1x2 (w0, z0))
+
+/*
+** sub_w7_z0:
+**	mov	(w8|w9|w10|w11), w7
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w7, z0),
+	    svsub_za16_vg1x2 (w7, z0))
+
+/*
+** sub_w8_z0:
+**	fsub	za\.h\[w8, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8, z0),
+	    svsub_za16_vg1x2 (w8, z0))
+
+/*
+** sub_w11_z0:
+**	fsub	za\.h\[w11, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w11, z0),
+	    svsub_za16_vg1x2 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+**	mov	(w8|w9|w10|w11), w12
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w12, z0),
+	    svsub_za16_vg1x2 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+**	fsub	za\.h\[w8, 7, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8 + 7, z0),
+	    svsub_za16_vg1x2 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8 + 8, z0),
+	    svsub_za16_vg1x2 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fsub	za\.h\[\1, 0, vgx2\], {z0\.h - z1\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8 - 1, z0),
+	    svsub_za16_vg1x2 (w8 - 1, z0))
+
+/*
+** sub_w8_z18:
+**	fsub	za\.h\[w8, 0, vgx2\], {z18\.h - z19\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8, z18),
+	    svsub_za16_vg1x2 (w8, z18))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** sub_w8_z23:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fsub	za\.h\[w8, 0, vgx2\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8, z23),
+	    svsub_za16_vg1x2 (w8, z23))
+
+/*
+** sub_w8_z28:
+**	fsub	za\.h\[w8, 0, vgx2\], {z28\.h - z29\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat16x2_t,
+	    svsub_za16_f16_vg1x2 (w8, z28),
+	    svsub_za16_vg1x2 (w8, z28))
diff --git a/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c
new file mode 100644
index 00000000000..c007ed5bace
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sme2/acle-asm/sub_za16_f16_vg1x4.c
@@ -0,0 +1,141 @@ 
+/* { dg-do assemble { target aarch64_asm_sme-f16f16_ok } } */
+/* { dg-do compile { target { ! aarch64_asm_sme-f16f16_ok } } } */
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sme2_acle.h"
+
+#pragma GCC target "+sme-f16f16"
+
+/*
+** sub_0_z0:
+**	mov	(w8|w9|w10|w11), #?0
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_0_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (0, z0),
+	    svsub_za16_vg1x4 (0, z0))
+
+/*
+** sub_w0_z0:
+**	mov	(w8|w9|w10|w11), w0
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w0_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w0, z0),
+	    svsub_za16_vg1x4 (w0, z0))
+
+/*
+** sub_w7_z0:
+**	mov	(w8|w9|w10|w11), w7
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w7_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w7, z0),
+	    svsub_za16_vg1x4 (w7, z0))
+
+/*
+** sub_w8_z0:
+**	fsub	za\.h\[w8, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8, z0),
+	    svsub_za16_vg1x4 (w8, z0))
+
+/*
+** sub_w11_z0:
+**	fsub	za\.h\[w11, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w11_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w11, z0),
+	    svsub_za16_vg1x4 (w11, z0))
+
+
+/*
+** sub_w12_z0:
+**	mov	(w8|w9|w10|w11), w12
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w12_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w12, z0),
+	    svsub_za16_vg1x4 (w12, z0))
+
+/*
+** sub_w8p7_z0:
+**	fsub	za\.h\[w8, 7, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8p7_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8 + 7, z0),
+	    svsub_za16_vg1x4 (w8 + 7, z0))
+
+/*
+** sub_w8p8_z0:
+**	add	(w8|w9|w10|w11), w8, #?8
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8p8_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8 + 8, z0),
+	    svsub_za16_vg1x4 (w8 + 8, z0))
+
+/*
+** sub_w8m1_z0:
+**	sub	(w8|w9|w10|w11), w8, #?1
+**	fsub	za\.h\[\1, 0, vgx4\], {z0\.h - z3\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8m1_z0, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8 - 1, z0),
+	    svsub_za16_vg1x4 (w8 - 1, z0))
+
+/*
+** sub_w8_z4:
+**	fsub	za\.h\[w8, 0, vgx4\], {z4\.h - z7\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z4, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8, z4),
+	    svsub_za16_vg1x4 (w8, z4))
+
+/* Leave the assembler to check for correctness for misaligned registers.  */
+
+/*
+** sub_w8_z18:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fsub	za\.h\[w8, 0, vgx4\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z18, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8, z18),
+	    svsub_za16_vg1x4 (w8, z18))
+
+/*
+** sub_w8_z23:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	fsub	za\.h\[w8, 0, vgx4\], [^\n]+
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z23, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8, z23),
+	    svsub_za16_vg1x4 (w8, z23))
+
+/*
+** sub_w8_z28:
+**	fsub	za\.h\[w8, 0, vgx4\], {z28\.h - z31\.h}
+**	ret
+*/
+TEST_ZA_XN (sub_w8_z28, svfloat16x4_t,
+	    svsub_za16_f16_vg1x4 (w8, z28),
+	    svsub_za16_vg1x4 (w8, z28))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 6c966a188de..e9112c02b3e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -764,4 +764,20 @@ 
 		    "w" (z25));					\
   }
 
+#define TEST_X2_WIDE(NAME, TTYPE, ZTYPE, CODE1, CODE2)		\
+  PROTO (NAME, void, ())					\
+  {								\
+    register ZTYPE z0 __asm ("z0");				\
+    register ZTYPE z5 __asm ("z5");				\
+    register TTYPE z6 __asm ("z6");				\
+    register TTYPE z16 __asm ("z16");				\
+    register ZTYPE z22 __asm ("z22");				\
+    register TTYPE z29 __asm ("z29");				\
+    register TTYPE z0_res __asm ("z0");				\
+    __asm volatile ("" : "=w" (z0), "=w" (z5), "=w" (z22));	\
+    INVOKE (CODE1, CODE2);					\
+    __asm volatile ("" :: "w" (z0_res), "w" (z5), "w" (z6),	\
+		    "w" (z16), "w" (z22), "w" (z29));		\
+  }
+
 #endif
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 44671adea12..3abe41e8597 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -12121,7 +12121,8 @@  proc check_effective_target_aarch64_tiny { } {
 
 foreach { aarch64_ext } { "fp" "simd" "crypto" "crc" "lse" "dotprod" "sve"
 			  "i8mm" "f32mm" "f64mm" "bf16" "sb" "sve2" "ls64"
-			  "sme" "sme-i16i64" "sme2" "sve-b16b16" } {
+			  "sme" "sme-i16i64" "sme2" "sve-b16b16"
+			  "sme-f16f16" } {
     eval [string map [list FUNC $aarch64_ext] {
 	proc check_effective_target_aarch64_asm_FUNC_ok { } {
 	  if { [istarget aarch64*-*-*] } {