diff mbox series

[10/15] aarch64: Add svboolx4_t

Message ID mptttck8br5.fsf@arm.com
State New
Headers show
Series aarch64: Add support for SVE2.1 | expand

Commit Message

Richard Sandiford Nov. 6, 2024, 6:20 p.m. UTC
This patch adds an svboolx4_t type, to go alongside the existing
svboolx2_t type.  It doesn't require any special ISA support beyond
SVE itself and it currently has no associated instructions.

gcc/
	* config/aarch64/aarch64-modes.def (VNx64BI): New mode.
	* config/aarch64/aarch64-protos.h
	(aarch64_split_double_move): Generalize to...
	(aarch64_split_move): ...this.
	* config/aarch64/aarch64-sve-builtins-base.def (svcreate4, svget4)
	(svset4, svundef4): Add bool variants.
	* config/aarch64/aarch64-sve-builtins.cc (handle_arm_sve_h): Add
	svboolx4_t.
	* config/aarch64/iterators.md (SVE_STRUCT_BI): New mode iterator.
	* config/aarch64/aarch64-sve.md (movvnx32bi): Generalize to...
	(mov<SVE_STRUCT_BI:mode>): ...this.
	* config/aarch64/aarch64.cc
	(pure_scalable_type_info::piece::get_rtx): Allow num_prs to be 4.
	(aarch64_classify_vector_mode): Handle VNx64BI.
	(aarch64_hard_regno_nregs): Likewise.
	(aarch64_class_max_nregs): Likewise.
	(aarch64_array_mode): Use VNx64BI for arrays of 4 svbool_ts.
	(aarch64_split_double_move): Generalize to...
	(aarch64_split_move): ...this.
	(aarch64_split_128bit_move): Update call accordingly.

gcc/testsuite/
	* gcc.target/aarch64/sve/acle/general-c/create_5.c: Expect svcreate4
	to succeed for svbool_ts.
	* gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
	(TEST_UNDEF_B): New macro.
	* gcc.target/aarch64/sve/acle/asm/create4_1.c: Test _b form.
	* gcc.target/aarch64/sve/acle/asm/undef2_1.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/undef4_1.c: Likewise.
	* gcc.target/aarch64/sve/acle/asm/get4_b.c: New test.
	* gcc.target/aarch64/sve/acle/asm/set4_b.c: Likewise.
	* gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c: Likewise.
---
 gcc/config/aarch64/aarch64-modes.def          |   3 +
 gcc/config/aarch64/aarch64-protos.h           |   2 +-
 .../aarch64/aarch64-sve-builtins-base.def     |   4 +
 gcc/config/aarch64/aarch64-sve-builtins.cc    |   2 +-
 gcc/config/aarch64/aarch64-sve.md             |   8 +-
 gcc/config/aarch64/aarch64.cc                 |  50 ++++----
 gcc/config/aarch64/iterators.md               |   2 +
 .../aarch64/sve/acle/asm/create4_1.c          |  10 ++
 .../gcc.target/aarch64/sve/acle/asm/get4_b.c  |  73 +++++++++++
 .../gcc.target/aarch64/sve/acle/asm/set4_b.c  |  87 +++++++++++++
 .../aarch64/sve/acle/asm/test_sve_acle.h      |   8 ++
 .../aarch64/sve/acle/asm/undef2_1.c           |   7 ++
 .../aarch64/sve/acle/asm/undef4_1.c           |   7 ++
 .../aarch64/sve/acle/general-c/create_5.c     |   2 +-
 .../aarch64/sve/acle/general-c/svboolx4_1.c   | 117 ++++++++++++++++++
 15 files changed, 351 insertions(+), 31 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_b.c
 create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c
diff mbox series

Patch

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 25a22c1195e..813421e1e39 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -48,18 +48,21 @@  ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
 
 /* Vector modes.  */
 
+VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8);
 VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4);
 VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
 VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
 VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
 VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
 
+ADJUST_NUNITS (VNx64BI, aarch64_sve_vg * 32);
 ADJUST_NUNITS (VNx32BI, aarch64_sve_vg * 16);
 ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
 ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
 ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
 ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
 
+ADJUST_ALIGNMENT (VNx64BI, 2);
 ADJUST_ALIGNMENT (VNx32BI, 2);
 ADJUST_ALIGNMENT (VNx16BI, 2);
 ADJUST_ALIGNMENT (VNx8BI, 2);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e8588e1cb17..660e335bf34 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1045,7 +1045,7 @@  rtx aarch64_simd_expand_builtin (int, tree, rtx);
 void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
 
-void aarch64_split_double_move (rtx, rtx, machine_mode);
+void aarch64_split_move (rtx, rtx, machine_mode);
 void aarch64_split_128bit_move (rtx, rtx);
 
 bool aarch64_split_128bit_move_p (rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index da2a0e41aa5..0353f56e705 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -74,6 +74,7 @@  DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
 DEF_SVE_FUNCTION (svcreate2, create, b, none)
 DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
 DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate4, create, b, none)
 DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz)
 DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
 DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
@@ -96,6 +97,7 @@  DEF_SVE_FUNCTION (svget2, get, all_data, none)
 DEF_SVE_FUNCTION (svget2, get, b, none)
 DEF_SVE_FUNCTION (svget3, get, all_data, none)
 DEF_SVE_FUNCTION (svget4, get, all_data, none)
+DEF_SVE_FUNCTION (svget4, get, b, none)
 DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
 DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none)
 DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit)
@@ -223,6 +225,7 @@  DEF_SVE_FUNCTION (svset2, set, all_data, none)
 DEF_SVE_FUNCTION (svset2, set, b, none)
 DEF_SVE_FUNCTION (svset3, set, all_data, none)
 DEF_SVE_FUNCTION (svset4, set, all_data, none)
+DEF_SVE_FUNCTION (svset4, set, b, none)
 DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
 DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
 DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit)
@@ -245,6 +248,7 @@  DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef2, inherent, b, none)
 DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef4, inherent, b, none)
 DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
 DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none)
 DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none)
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 9fb0d6fd416..259e7b7975c 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -4697,7 +4697,7 @@  handle_arm_sve_h (bool function_nulls_p)
       register_vector_type (type);
       if (type != VECTOR_TYPE_svcount_t)
 	for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
-	  if (type != VECTOR_TYPE_svbool_t || count == 2)
+	  if (type != VECTOR_TYPE_svbool_t || count == 2 || count == 4)
 	    register_tuple_type (count, type);
     }
 
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 0955a697680..3d92a2a454f 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1074,9 +1074,9 @@  (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
 ;; ---- Moves of multiple predicates
 ;; -------------------------------------------------------------------------
 
-(define_insn_and_split "movvnx32bi"
-  [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
-	(match_operand:VNx32BI 1 "aarch64_mov_operand"))]
+(define_insn_and_split "mov<mode>"
+  [(set (match_operand:SVE_STRUCT_BI 0 "nonimmediate_operand")
+	(match_operand:SVE_STRUCT_BI 1 "aarch64_mov_operand"))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   ]
      [ Upa      , Upa ] #
@@ -1086,7 +1086,7 @@  (define_insn_and_split "movvnx32bi"
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
+    aarch64_split_move (operands[0], operands[1], VNx16BImode);
     DONE;
   }
 )
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9347e06f0e9..e306f86f514 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -969,7 +969,7 @@  pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
   if (num_zr > 0 && num_pr == 0)
     return gen_rtx_REG (mode, first_zr);
 
-  if (num_zr == 0 && num_pr <= 2)
+  if (num_zr == 0 && num_pr > 0)
     return gen_rtx_REG (mode, first_pr);
 
   gcc_unreachable ();
@@ -1684,6 +1684,7 @@  aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
       return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD : 0;
 
     case E_VNx32BImode:
+    case E_VNx64BImode:
       return TARGET_SVE ? VEC_SVE_PRED | VEC_STRUCT : 0;
 
     default:
@@ -1815,13 +1816,15 @@  aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
 {
   if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
     {
-      /* Use VNx32BI for pairs of predicates, but explicitly reject giving
-	 a mode to other array sizes.  Using integer modes requires a round
-	 trip through memory and generates terrible code.  */
+      /* Use VNx32BI and VNx64BI for tuples of predicates, but explicitly
+	 reject giving a mode to other array sizes.  Using integer modes
+	 requires a round trip through memory and generates terrible code.  */
       if (nelems == 1)
 	return mode;
       if (mode == VNx16BImode && nelems == 2)
 	return VNx32BImode;
+      if (mode == VNx16BImode && nelems == 4)
+	return VNx64BImode;
       return BLKmode;
     }
 
@@ -2094,7 +2097,7 @@  aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
-      return mode == VNx32BImode ? 2 : 1;
+      return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
 
     case MOVEABLE_SYSREGS:
     case FFR_REGS:
@@ -3270,31 +3273,30 @@  aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
     emit_move_insn (dest, tmp);
 }
 
-/* Split a move from SRC to DST into two moves of mode SINGLE_MODE.  */
+/* Split a move from SRC to DST into multiple moves of mode SINGLE_MODE.  */
 
 void
-aarch64_split_double_move (rtx dst, rtx src, machine_mode single_mode)
+aarch64_split_move (rtx dst, rtx src, machine_mode single_mode)
 {
   machine_mode mode = GET_MODE (dst);
+  auto npieces = exact_div (GET_MODE_SIZE (mode),
+			    GET_MODE_SIZE (single_mode)).to_constant ();
+  auto_vec<rtx, 4> dst_pieces, src_pieces;
 
-  rtx dst0 = simplify_gen_subreg (single_mode, dst, mode, 0);
-  rtx dst1 = simplify_gen_subreg (single_mode, dst, mode,
-				  GET_MODE_SIZE (single_mode));
-  rtx src0 = simplify_gen_subreg (single_mode, src, mode, 0);
-  rtx src1 = simplify_gen_subreg (single_mode, src, mode,
-				  GET_MODE_SIZE (single_mode));
-
-  /* At most one pairing may overlap.  */
-  if (reg_overlap_mentioned_p (dst0, src1))
+  for (unsigned int i = 0; i < npieces; ++i)
     {
-      aarch64_emit_move (dst1, src1);
-      aarch64_emit_move (dst0, src0);
+      auto off = i * GET_MODE_SIZE (single_mode);
+      dst_pieces.safe_push (simplify_gen_subreg (single_mode, dst, mode, off));
+      src_pieces.safe_push (simplify_gen_subreg (single_mode, src, mode, off));
     }
+
+  /* At most one pairing may overlap.  */
+  if (reg_overlap_mentioned_p (dst_pieces[0], src))
+    for (unsigned int i = npieces; i-- > 0;)
+      aarch64_emit_move (dst_pieces[i], src_pieces[i]);
   else
-    {
-      aarch64_emit_move (dst0, src0);
-      aarch64_emit_move (dst1, src1);
-    }
+    for (unsigned int i = 0; i < npieces; ++i)
+      aarch64_emit_move (dst_pieces[i], src_pieces[i]);
 }
 
 /* Split a 128-bit move operation into two 64-bit move operations,
@@ -3338,7 +3340,7 @@  aarch64_split_128bit_move (rtx dst, rtx src)
 	}
     }
 
-  aarch64_split_double_move (dst, src, word_mode);
+  aarch64_split_move (dst, src, word_mode);
 }
 
 /* Return true if we should split a move from 128-bit value SRC
@@ -13172,7 +13174,7 @@  aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
-      return mode == VNx32BImode ? 2 : 1;
+      return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
 
     case MOVEABLE_SYSREGS:
     case STACK_REG:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 4942631aa95..b8924cdc74b 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -556,6 +556,8 @@  (define_mode_iterator SVE_FULLx24 [SVE_FULLx2 SVE_FULLx4])
 ;; All SVE vector structure modes.
 (define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4])
 
+(define_mode_iterator SVE_STRUCT_BI [VNx32BI VNx64BI])
+
 ;; All SVE vector and structure modes.
 (define_mode_iterator SVE_ALL_STRUCT [SVE_ALL SVE_STRUCT])
 
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
index b5ffd4e6aaf..1d2ff4e871d 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/create4_1.c
@@ -145,3 +145,13 @@  TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t,
 TEST_CREATE (create4_f64, svfloat64x4_t, svfloat64_t,
 	     z0 = svcreate4_f64 (z5, z4, z7, z6),
 	     z0 = svcreate4 (z5, z4, z7, z6))
+
+/* This is awkward to code-generate, so don't match a particular output.  */
+TEST_CREATE_B (create4_b_0, svboolx4_t,
+	       p0_res = svcreate4_b (p0, p1, p2, p3),
+	       p0_res = svcreate4 (p0, p1, p2, p3))
+
+/* This is awkward to code-generate, so don't match a particular output.  */
+TEST_CREATE_B (create4_b_1, svboolx4_t,
+	       p0_res = svcreate4_b (p3, p2, p1, p0),
+	       p0_res = svcreate4 (p3, p2, p1, p0))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_b.c
new file mode 100644
index 00000000000..146253aac3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/get4_b.c
@@ -0,0 +1,73 @@ 
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get4_b_p0_0:
+**	mov	p0\.b, p4\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p0_0, svboolx4_t,
+	    p0 = svget4_b (p4, 0),
+	    p0 = svget4 (p4, 0))
+
+/*
+** get4_b_p0_1:
+**	mov	p0\.b, p5\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p0_1, svboolx4_t,
+	    p0 = svget4_b (p4, 1),
+	    p0 = svget4 (p4, 1))
+
+/*
+** get4_b_p0_2:
+**	mov	p0\.b, p6\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p0_2, svboolx4_t,
+	    p0 = svget4_b (p4, 2),
+	    p0 = svget4 (p4, 2))
+
+/*
+** get4_b_p0_3:
+**	mov	p0\.b, p7\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p0_3, svboolx4_t,
+	    p0 = svget4_b (p4, 3),
+	    p0 = svget4 (p4, 3))
+
+/*
+** get4_b_p4_0:
+**	ret
+*/
+TEST_GET_B (get4_b_p4_0, svboolx4_t,
+	    p4_res = svget4_b (p4, 0),
+	    p4_res = svget4 (p4, 0))
+
+/*
+** get4_b_p4_3:
+**	mov	p4\.b, p7\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p4_3, svboolx4_t,
+	    p4_res = svget4_b (p4, 3),
+	    p4_res = svget4 (p4, 3))
+
+/*
+** get4_b_p5_0:
+**	mov	p5\.b, p4\.b
+**	ret
+*/
+TEST_GET_B (get4_b_p5_0, svboolx4_t,
+	    p5_res = svget4_b (p4, 0),
+	    p5_res = svget4 (p4, 0))
+
+/*
+** get4_b_p5_1:
+**	ret
+*/
+TEST_GET_B (get4_b_p5_1, svboolx4_t,
+	    p5_res = svget4_b (p4, 1),
+	    p5_res = svget4 (p4, 1))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_b.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_b.c
new file mode 100644
index 00000000000..13efdf9bc2e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/set4_b.c
@@ -0,0 +1,87 @@ 
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set4_b_p8_0:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	p8\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p8_0, svboolx4_t,
+	    p8 = svset4_b (p4, 0, p0),
+	    p8 = svset4 (p4, 0, p0))
+
+/*
+** set4_b_p8_1:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	p9\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p8_1, svboolx4_t,
+	    p8 = svset4_b (p4, 1, p0),
+	    p8 = svset4 (p4, 1, p0))
+
+/*
+** set4_b_p8_2:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	p10\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p8_2, svboolx4_t,
+	    p8 = svset4_b (p4, 2, p0),
+	    p8 = svset4 (p4, 2, p0))
+
+/*
+** set4_b_p8_3:
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	[^\n]+
+**	mov	p11\.b, p0\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p8_3, svboolx4_t,
+	    p8 = svset4_b (p4, 3, p0),
+	    p8 = svset4 (p4, 3, p0))
+
+/*
+** set4_b_p4_0:
+**	mov	p4\.b, p12\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p4_0, svboolx4_t,
+	    p4 = svset4_b (p4, 0, p12),
+	    p4 = svset4 (p4, 0, p12))
+
+/*
+** set4_b_p4_1:
+**	mov	p5\.b, p13\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p4_1, svboolx4_t,
+	    p4 = svset4_b (p4, 1, p13),
+	    p4 = svset4 (p4, 1, p13))
+
+/*
+** set4_b_p4_2:
+**	mov	p6\.b, p12\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p4_2, svboolx4_t,
+	    p4 = svset4_b (p4, 2, p12),
+	    p4 = svset4 (p4, 2, p12))
+
+/*
+** set4_b_p4_3:
+**	mov	p7\.b, p13\.b
+**	ret
+*/
+TEST_SET_B (set4_b_p4_3, svboolx4_t,
+	    p4 = svset4_b (p4, 3, p13),
+	    p4 = svset4 (p4, 3, p13))
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
index 367024be863..6c966a188de 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/test_sve_acle.h
@@ -543,6 +543,14 @@ 
     return z0;				\
   }
 
+#define TEST_UNDEF_B(NAME, TYPE, CODE)	\
+  PROTO (NAME, TYPE, (void))		\
+  {					\
+    TYPE p0;				\
+    CODE;				\
+    return p0;				\
+  }
+
 #define TEST_CREATE(NAME, TTYPE, ZTYPE, CODE1, CODE2)		\
   PROTO (NAME, TTYPE, (ZTYPE unused0, ZTYPE unused1,		\
 		       ZTYPE unused2, ZTYPE unused3,		\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c
index fe6c4c7c7d5..2c520df99a3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef2_1.c
@@ -85,3 +85,10 @@  TEST_UNDEF (uint64, svuint64x2_t,
 */
 TEST_UNDEF (float64, svfloat64x2_t,
 	    z0 = svundef2_f64 ())
+
+/*
+** bools:
+**	ret
+*/
+TEST_UNDEF_B (bools, svboolx2_t,
+	      p0 = svundef2_b ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c
index 4d6b86b04b5..9bda4d66e89 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/asm/undef4_1.c
@@ -85,3 +85,10 @@  TEST_UNDEF (uint64, svuint64x4_t,
 */
 TEST_UNDEF (float64, svfloat64x4_t,
 	    z0 = svundef4_f64 ())
+
+/*
+** bools:
+**	ret
+*/
+TEST_UNDEF_B (bools, svboolx4_t,
+	      p0 = svundef4_b ())
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
index bf3dd5d7514..687327d7173 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/create_5.c
@@ -17,7 +17,7 @@  f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64,
   *ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */
   *ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */
   *ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */
-  *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */
+  *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svboolx4_t'} } */
   *ptr = svcreate4 (s32, s32, s32, s32);
   *ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */
 }
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c
new file mode 100644
index 00000000000..498c0fa40a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/svboolx4_1.c
@@ -0,0 +1,117 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** ret_p0:
+**	ret
+*/
+svboolx4_t
+ret_p0 (svboolx4_t p0)
+{
+  return p0;
+}
+
+/*
+** ret_p1:
+**	addvl	sp, sp, #-1
+**	str	p4, \[sp\]
+**	mov	p0\.b, p1\.b
+**	mov	p1\.b, p2\.b
+**	mov	p2\.b, p3\.b
+**	mov	p3\.b, p4\.b
+**	ldr	p4, \[sp\]
+**	addvl	sp, sp, #1
+**	ret
+*/
+svboolx4_t
+ret_p1 (void)
+{
+  register svboolx4_t p1 asm ("p1");
+  asm volatile ("" : "=Upa" (p1));
+  return p1;
+}
+
+/*
+** ret_mem:
+** (
+**	ldr	p0, \[x0\]
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p2, \[x0, #2, mul vl\]
+**	ldr	p3, \[x0, #3, mul vl\]
+** |
+**	ldr	p3, \[x0, #3, mul vl\]
+**	ldr	p2, \[x0, #2, mul vl\]
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p0, \[x0\]
+** )
+**	ret
+*/
+svboolx4_t
+ret_mem (svboolx4_t p0, svboolx4_t mem)
+{
+  return mem;
+}
+
+/*
+** load:
+** (
+**	ldr	p0, \[x0\]
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p2, \[x0, #2, mul vl\]
+**	ldr	p3, \[x0, #3, mul vl\]
+** |
+**	ldr	p3, \[x0, #2, mul vl\]
+**	ldr	p2, \[x0, #3, mul vl\]
+**	ldr	p1, \[x0, #1, mul vl\]
+**	ldr	p0, \[x0\]
+** )
+**	ret
+*/
+svboolx4_t
+load (svboolx4_t *ptr)
+{
+  return *ptr;
+}
+
+/*
+** store:
+** (
+**	str	p0, \[x0\]
+**	str	p1, \[x0, #1, mul vl\]
+**	str	p2, \[x0, #2, mul vl\]
+**	str	p3, \[x0, #3, mul vl\]
+** |
+**	str	p3, \[x0, #3, mul vl\]
+**	str	p2, \[x0, #2, mul vl\]
+**	str	p1, \[x0, #1, mul vl\]
+**	str	p0, \[x0\]
+** )
+**	ret
+*/
+void
+store (svboolx4_t p0, svboolx4_t *ptr)
+{
+  *ptr = p0;
+}
+
+/*
+** p0_to_p1:
+**	addvl	sp, sp, #-1
+**	str	p4, \[sp\]
+**	mov	p4\.b, p3\.b
+**	mov	p3\.b, p2\.b
+**	mov	p2\.b, p1\.b
+**	mov	p1\.b, p0\.b
+**	ldr	p4, \[sp\]
+**	addvl	sp, sp, #1
+**	ret
+*/
+void
+p0_to_p1 (svboolx4_t p0)
+{
+  register svboolx4_t p1 asm ("p1") = p0;
+  asm volatile ("" :: "Upa" (p1));
+}