@@ -48,18 +48,21 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
/* Vector modes. */
+VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8);
VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4);
VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
+ADJUST_NUNITS (VNx64BI, aarch64_sve_vg * 32);
ADJUST_NUNITS (VNx32BI, aarch64_sve_vg * 16);
ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
+ADJUST_ALIGNMENT (VNx64BI, 2);
ADJUST_ALIGNMENT (VNx32BI, 2);
ADJUST_ALIGNMENT (VNx16BI, 2);
ADJUST_ALIGNMENT (VNx8BI, 2);
@@ -1045,7 +1045,7 @@ rtx aarch64_simd_expand_builtin (int, tree, rtx);
void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
-void aarch64_split_double_move (rtx, rtx, machine_mode);
+void aarch64_split_move (rtx, rtx, machine_mode);
void aarch64_split_128bit_move (rtx, rtx);
bool aarch64_split_128bit_move_p (rtx, rtx);
@@ -74,6 +74,7 @@ DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
DEF_SVE_FUNCTION (svcreate2, create, b, none)
DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate4, create, b, none)
DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz)
DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
@@ -96,6 +97,7 @@ DEF_SVE_FUNCTION (svget2, get, all_data, none)
DEF_SVE_FUNCTION (svget2, get, b, none)
DEF_SVE_FUNCTION (svget3, get, all_data, none)
DEF_SVE_FUNCTION (svget4, get, all_data, none)
+DEF_SVE_FUNCTION (svget4, get, b, none)
DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none)
DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit)
@@ -223,6 +225,7 @@ DEF_SVE_FUNCTION (svset2, set, all_data, none)
DEF_SVE_FUNCTION (svset2, set, b, none)
DEF_SVE_FUNCTION (svset3, set, all_data, none)
DEF_SVE_FUNCTION (svset4, set, all_data, none)
+DEF_SVE_FUNCTION (svset4, set, b, none)
DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit)
@@ -245,6 +248,7 @@ DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
DEF_SVE_FUNCTION (svundef2, inherent, b, none)
DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef4, inherent, b, none)
DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none)
DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none)
@@ -4697,7 +4697,7 @@ handle_arm_sve_h (bool function_nulls_p)
register_vector_type (type);
if (type != VECTOR_TYPE_svcount_t)
for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
- if (type != VECTOR_TYPE_svbool_t || count == 2)
+ if (type != VECTOR_TYPE_svbool_t || count == 2 || count == 4)
register_tuple_type (count, type);
}
@@ -1074,9 +1074,9 @@ (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
;; ---- Moves of multiple predicates
;; -------------------------------------------------------------------------
-(define_insn_and_split "movvnx32bi"
- [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
- (match_operand:VNx32BI 1 "aarch64_mov_operand"))]
+(define_insn_and_split "mov<mode>"
+ [(set (match_operand:SVE_STRUCT_BI 0 "nonimmediate_operand")
+ (match_operand:SVE_STRUCT_BI 1 "aarch64_mov_operand"))]
"TARGET_SVE"
{@ [ cons: =0 , 1 ]
[ Upa , Upa ] #
@@ -1086,7 +1086,7 @@ (define_insn_and_split "movvnx32bi"
"&& reload_completed"
[(const_int 0)]
{
- aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
+ aarch64_split_move (operands[0], operands[1], VNx16BImode);
DONE;
}
)
@@ -969,7 +969,7 @@ pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
if (num_zr > 0 && num_pr == 0)
return gen_rtx_REG (mode, first_zr);
- if (num_zr == 0 && num_pr <= 2)
+ if (num_zr == 0 && num_pr > 0)
return gen_rtx_REG (mode, first_pr);
gcc_unreachable ();
@@ -1684,6 +1684,7 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD : 0;
case E_VNx32BImode:
+ case E_VNx64BImode:
return TARGET_SVE ? VEC_SVE_PRED | VEC_STRUCT : 0;
default:
@@ -1815,13 +1816,15 @@ aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
{
if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
{
- /* Use VNx32BI for pairs of predicates, but explicitly reject giving
- a mode to other array sizes. Using integer modes requires a round
- trip through memory and generates terrible code. */
+ /* Use VNx32BI and VNx64BI for tuples of predicates, but explicitly
+ reject giving a mode to other array sizes. Using integer modes
+ requires a round trip through memory and generates terrible code. */
if (nelems == 1)
return mode;
if (mode == VNx16BImode && nelems == 2)
return VNx32BImode;
+ if (mode == VNx16BImode && nelems == 4)
+ return VNx64BImode;
return BLKmode;
}
@@ -2094,7 +2097,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
case PR_REGS:
case PR_LO_REGS:
case PR_HI_REGS:
- return mode == VNx32BImode ? 2 : 1;
+ return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
case MOVEABLE_SYSREGS:
case FFR_REGS:
@@ -3270,31 +3273,30 @@ aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
emit_move_insn (dest, tmp);
}
-/* Split a move from SRC to DST into two moves of mode SINGLE_MODE. */
+/* Split a move from SRC to DST into multiple moves of mode SINGLE_MODE. */
void
-aarch64_split_double_move (rtx dst, rtx src, machine_mode single_mode)
+aarch64_split_move (rtx dst, rtx src, machine_mode single_mode)
{
machine_mode mode = GET_MODE (dst);
+ auto npieces = exact_div (GET_MODE_SIZE (mode),
+ GET_MODE_SIZE (single_mode)).to_constant ();
+ auto_vec<rtx, 4> dst_pieces, src_pieces;
- rtx dst0 = simplify_gen_subreg (single_mode, dst, mode, 0);
- rtx dst1 = simplify_gen_subreg (single_mode, dst, mode,
- GET_MODE_SIZE (single_mode));
- rtx src0 = simplify_gen_subreg (single_mode, src, mode, 0);
- rtx src1 = simplify_gen_subreg (single_mode, src, mode,
- GET_MODE_SIZE (single_mode));
-
- /* At most one pairing may overlap. */
- if (reg_overlap_mentioned_p (dst0, src1))
+ for (unsigned int i = 0; i < npieces; ++i)
{
- aarch64_emit_move (dst1, src1);
- aarch64_emit_move (dst0, src0);
+ auto off = i * GET_MODE_SIZE (single_mode);
+ dst_pieces.safe_push (simplify_gen_subreg (single_mode, dst, mode, off));
+ src_pieces.safe_push (simplify_gen_subreg (single_mode, src, mode, off));
}
+
+ /* At most one pairing may overlap. */
+ if (reg_overlap_mentioned_p (dst_pieces[0], src))
+ for (unsigned int i = npieces; i-- > 0;)
+ aarch64_emit_move (dst_pieces[i], src_pieces[i]);
else
- {
- aarch64_emit_move (dst0, src0);
- aarch64_emit_move (dst1, src1);
- }
+ for (unsigned int i = 0; i < npieces; ++i)
+ aarch64_emit_move (dst_pieces[i], src_pieces[i]);
}
/* Split a 128-bit move operation into two 64-bit move operations,
@@ -3338,7 +3340,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
}
}
- aarch64_split_double_move (dst, src, word_mode);
+ aarch64_split_move (dst, src, word_mode);
}
/* Return true if we should split a move from 128-bit value SRC
@@ -13172,7 +13174,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
case PR_REGS:
case PR_LO_REGS:
case PR_HI_REGS:
- return mode == VNx32BImode ? 2 : 1;
+ return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
case MOVEABLE_SYSREGS:
case STACK_REG:
@@ -556,6 +556,8 @@ (define_mode_iterator SVE_FULLx24 [SVE_FULLx2 SVE_FULLx4])
;; All SVE vector structure modes.
(define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4])
+(define_mode_iterator SVE_STRUCT_BI [VNx32BI VNx64BI])
+
;; All SVE vector and structure modes.
(define_mode_iterator SVE_ALL_STRUCT [SVE_ALL SVE_STRUCT])
@@ -145,3 +145,13 @@ TEST_CREATE (create4_u64, svuint64x4_t, svuint64_t,
TEST_CREATE (create4_f64, svfloat64x4_t, svfloat64_t,
z0 = svcreate4_f64 (z5, z4, z7, z6),
z0 = svcreate4 (z5, z4, z7, z6))
+
+/* This is awkward to code-generate, so don't match a particular output. */
+TEST_CREATE_B (create4_b_0, svboolx4_t,
+ p0_res = svcreate4_b (p0, p1, p2, p3),
+ p0_res = svcreate4 (p0, p1, p2, p3))
+
+/* This is awkward to code-generate, so don't match a particular output. */
+TEST_CREATE_B (create4_b_1, svboolx4_t,
+ p0_res = svcreate4_b (p3, p2, p1, p0),
+ p0_res = svcreate4 (p3, p2, p1, p0))
new file mode 100644
@@ -0,0 +1,73 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** get4_b_p0_0:
+** mov p0\.b, p4\.b
+** ret
+*/
+TEST_GET_B (get4_b_p0_0, svboolx4_t,
+ p0 = svget4_b (p4, 0),
+ p0 = svget4 (p4, 0))
+
+/*
+** get4_b_p0_1:
+** mov p0\.b, p5\.b
+** ret
+*/
+TEST_GET_B (get4_b_p0_1, svboolx4_t,
+ p0 = svget4_b (p4, 1),
+ p0 = svget4 (p4, 1))
+
+/*
+** get4_b_p0_2:
+** mov p0\.b, p6\.b
+** ret
+*/
+TEST_GET_B (get4_b_p0_2, svboolx4_t,
+ p0 = svget4_b (p4, 2),
+ p0 = svget4 (p4, 2))
+
+/*
+** get4_b_p0_3:
+** mov p0\.b, p7\.b
+** ret
+*/
+TEST_GET_B (get4_b_p0_3, svboolx4_t,
+ p0 = svget4_b (p4, 3),
+ p0 = svget4 (p4, 3))
+
+/*
+** get4_b_p4_0:
+** ret
+*/
+TEST_GET_B (get4_b_p4_0, svboolx4_t,
+ p4_res = svget4_b (p4, 0),
+ p4_res = svget4 (p4, 0))
+
+/*
+** get4_b_p4_3:
+** mov p4\.b, p7\.b
+** ret
+*/
+TEST_GET_B (get4_b_p4_3, svboolx4_t,
+ p4_res = svget4_b (p4, 3),
+ p4_res = svget4 (p4, 3))
+
+/*
+** get4_b_p5_0:
+** mov p5\.b, p4\.b
+** ret
+*/
+TEST_GET_B (get4_b_p5_0, svboolx4_t,
+ p5_res = svget4_b (p4, 0),
+ p5_res = svget4 (p4, 0))
+
+/*
+** get4_b_p5_1:
+** ret
+*/
+TEST_GET_B (get4_b_p5_1, svboolx4_t,
+ p5_res = svget4_b (p4, 1),
+ p5_res = svget4 (p4, 1))
new file mode 100644
@@ -0,0 +1,87 @@
+/* { dg-final { check-function-bodies "**" "" "-DCHECK_ASM" } } */
+
+#include "test_sve_acle.h"
+
+/*
+** set4_b_p8_0:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov p8\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set4_b_p8_0, svboolx4_t,
+ p8 = svset4_b (p4, 0, p0),
+ p8 = svset4 (p4, 0, p0))
+
+/*
+** set4_b_p8_1:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov p9\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set4_b_p8_1, svboolx4_t,
+ p8 = svset4_b (p4, 1, p0),
+ p8 = svset4 (p4, 1, p0))
+
+/*
+** set4_b_p8_2:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov p10\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set4_b_p8_2, svboolx4_t,
+ p8 = svset4_b (p4, 2, p0),
+ p8 = svset4 (p4, 2, p0))
+
+/*
+** set4_b_p8_3:
+** mov [^\n]+
+** mov [^\n]+
+** mov [^\n]+
+** mov p11\.b, p0\.b
+** ret
+*/
+TEST_SET_B (set4_b_p8_3, svboolx4_t,
+ p8 = svset4_b (p4, 3, p0),
+ p8 = svset4 (p4, 3, p0))
+
+/*
+** set4_b_p4_0:
+** mov p4\.b, p12\.b
+** ret
+*/
+TEST_SET_B (set4_b_p4_0, svboolx4_t,
+ p4 = svset4_b (p4, 0, p12),
+ p4 = svset4 (p4, 0, p12))
+
+/*
+** set4_b_p4_1:
+** mov p5\.b, p13\.b
+** ret
+*/
+TEST_SET_B (set4_b_p4_1, svboolx4_t,
+ p4 = svset4_b (p4, 1, p13),
+ p4 = svset4 (p4, 1, p13))
+
+/*
+** set4_b_p4_2:
+** mov p6\.b, p12\.b
+** ret
+*/
+TEST_SET_B (set4_b_p4_2, svboolx4_t,
+ p4 = svset4_b (p4, 2, p12),
+ p4 = svset4 (p4, 2, p12))
+
+/*
+** set4_b_p4_3:
+** mov p7\.b, p13\.b
+** ret
+*/
+TEST_SET_B (set4_b_p4_3, svboolx4_t,
+ p4 = svset4_b (p4, 3, p13),
+ p4 = svset4 (p4, 3, p13))
@@ -543,6 +543,14 @@
return z0; \
}
+#define TEST_UNDEF_B(NAME, TYPE, CODE) \
+ PROTO (NAME, TYPE, (void)) \
+ { \
+ TYPE p0; \
+ CODE; \
+ return p0; \
+ }
+
#define TEST_CREATE(NAME, TTYPE, ZTYPE, CODE1, CODE2) \
PROTO (NAME, TTYPE, (ZTYPE unused0, ZTYPE unused1, \
ZTYPE unused2, ZTYPE unused3, \
@@ -85,3 +85,10 @@ TEST_UNDEF (uint64, svuint64x2_t,
*/
TEST_UNDEF (float64, svfloat64x2_t,
z0 = svundef2_f64 ())
+
+/*
+** bools:
+** ret
+*/
+TEST_UNDEF_B (bools, svboolx2_t,
+ p0 = svundef2_b ())
@@ -85,3 +85,10 @@ TEST_UNDEF (uint64, svuint64x4_t,
*/
TEST_UNDEF (float64, svfloat64x4_t,
z0 = svundef4_f64 ())
+
+/*
+** bools:
+** ret
+*/
+TEST_UNDEF_B (bools, svboolx4_t,
+ p0 = svundef4_b ())
@@ -17,7 +17,7 @@ f1 (svint32x4_t *ptr, svbool_t pg, svint32_t s32, svfloat64_t f64,
*ptr = svcreate4 (s32, x, s32, s32); /* { dg-error {passing 'int' to argument 2 of 'svcreate4', which expects an SVE type rather than a scalar} } */
*ptr = svcreate4 (x, s32, s32, s32); /* { dg-error {passing 'int' to argument 1 of 'svcreate4', which expects an SVE type rather than a scalar} } */
*ptr = svcreate4 (pg, s32, s32, s32); /* { dg-error {passing 'svint32_t' to argument 2 of 'svcreate4', but argument 1 had type 'svbool_t'} } */
- *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {'svcreate4' has no form that takes 'svbool_t' arguments} } */
+ *ptr = svcreate4 (pg, pg, pg, pg); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svboolx4_t'} } */
*ptr = svcreate4 (s32, s32, s32, s32);
*ptr = svcreate4 (f64, f64, f64, f64); /* { dg-error {incompatible types when assigning to type 'svint32x4_t' from type 'svfloat64x4_t'} } */
}
new file mode 100644
@@ -0,0 +1,117 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#include <arm_sve.h>
+
+/*
+** ret_p0:
+** ret
+*/
+svboolx4_t
+ret_p0 (svboolx4_t p0)
+{
+ return p0;
+}
+
+/*
+** ret_p1:
+** addvl sp, sp, #-1
+** str p4, \[sp\]
+** mov p0\.b, p1\.b
+** mov p1\.b, p2\.b
+** mov p2\.b, p3\.b
+** mov p3\.b, p4\.b
+** ldr p4, \[sp\]
+** addvl sp, sp, #1
+** ret
+*/
+svboolx4_t
+ret_p1 (void)
+{
+ register svboolx4_t p1 asm ("p1");
+ asm volatile ("" : "=Upa" (p1));
+ return p1;
+}
+
+/*
+** ret_mem:
+** (
+** ldr p0, \[x0\]
+** ldr p1, \[x0, #1, mul vl\]
+** ldr p2, \[x0, #2, mul vl\]
+** ldr p3, \[x0, #3, mul vl\]
+** |
+** ldr p3, \[x0, #3, mul vl\]
+** ldr p2, \[x0, #2, mul vl\]
+** ldr p1, \[x0, #1, mul vl\]
+** ldr p0, \[x0\]
+** )
+** ret
+*/
+svboolx4_t
+ret_mem (svboolx4_t p0, svboolx4_t mem)
+{
+ return mem;
+}
+
+/*
+** load:
+** (
+** ldr p0, \[x0\]
+** ldr p1, \[x0, #1, mul vl\]
+** ldr p2, \[x0, #2, mul vl\]
+** ldr p3, \[x0, #3, mul vl\]
+** |
+** ldr p3, \[x0, #2, mul vl\]
+** ldr p2, \[x0, #3, mul vl\]
+** ldr p1, \[x0, #1, mul vl\]
+** ldr p0, \[x0\]
+** )
+** ret
+*/
+svboolx4_t
+load (svboolx4_t *ptr)
+{
+ return *ptr;
+}
+
+/*
+** store:
+** (
+** str p0, \[x0\]
+** str p1, \[x0, #1, mul vl\]
+** str p2, \[x0, #2, mul vl\]
+** str p3, \[x0, #3, mul vl\]
+** |
+** str p3, \[x0, #3, mul vl\]
+** str p2, \[x0, #2, mul vl\]
+** str p1, \[x0, #1, mul vl\]
+** str p0, \[x0\]
+** )
+** ret
+*/
+void
+store (svboolx4_t p0, svboolx4_t *ptr)
+{
+ *ptr = p0;
+}
+
+/*
+** p0_to_p1:
+** addvl sp, sp, #-1
+** str p4, \[sp\]
+** mov p4\.b, p3\.b
+** mov p3\.b, p2\.b
+** mov p2\.b, p1\.b
+** mov p1\.b, p0\.b
+** ldr p4, \[sp\]
+** addvl sp, sp, #1
+** ret
+*/
+void
+p0_to_p1 (svboolx4_t p0)
+{
+ register svboolx4_t p1 asm ("p1") = p0;
+ asm volatile ("" :: "Upa" (p1));
+}