@@ -1933,6 +1933,46 @@ aarch64_sve_int_mode (machine_mode mode)
return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
}
+/* Look for a vector mode with the same classification as VEC_MODE,
+ but with each group of FACTOR elements coalesced into a single element.
+ In other words, look for a mode in which the elements are FACTOR times
+ larger and in which the number of elements is FACTOR times smaller.
+
+ Return the mode found, if one exists. */
+
+static opt_machine_mode
+aarch64_coalesce_units (machine_mode vec_mode, unsigned int factor)
+{
+ auto elt_bits = vector_element_size (GET_MODE_BITSIZE (vec_mode),
+ GET_MODE_NUNITS (vec_mode));
+ auto vec_flags = aarch64_classify_vector_mode (vec_mode);
+ if (vec_flags & VEC_SVE_PRED)
+ {
+ if (known_eq (GET_MODE_SIZE (vec_mode), BYTES_PER_SVE_PRED))
+ return aarch64_sve_pred_mode (elt_bits * factor);
+ return {};
+ }
+
+ scalar_mode new_elt_mode;
+ if (!int_mode_for_size (elt_bits * factor, false).exists (&new_elt_mode))
+ return {};
+
+ if (vec_flags == VEC_ADVSIMD)
+ {
+ auto mode = aarch64_simd_container_mode (new_elt_mode,
+ GET_MODE_BITSIZE (vec_mode));
+ if (mode != word_mode)
+ return mode;
+ }
+ else if (vec_flags & VEC_SVE_DATA)
+ {
+ poly_uint64 new_nunits;
+ if (multiple_p (GET_MODE_NUNITS (vec_mode), factor, &new_nunits))
+ return aarch64_sve_data_mode (new_elt_mode, new_nunits);
+ }
+ return {};
+}
+
/* Implement TARGET_VECTORIZE_RELATED_MODE. */
static opt_machine_mode
@@ -25731,26 +25771,23 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
{
expand_vec_perm_d newd;
- if (d->vec_flags != VEC_ADVSIMD)
+ /* The subregs that we'd create are not supported for big-endian SVE;
+ see aarch64_modes_compatible_p for details. */
+ if (BYTES_BIG_ENDIAN && (d->vec_flags & VEC_ANY_SVE))
return false;
/* Get the new mode. Always twice the size of the inner
and half the elements. */
- poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
- unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
- auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
- machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
-
- if (new_mode == word_mode)
+ machine_mode new_mode;
+ if (!aarch64_coalesce_units (d->vmode, 2).exists (&new_mode))
return false;
vec_perm_indices newpermindices;
-
if (!newpermindices.new_shrunk_vector (d->perm, 2))
return false;
newd.vmode = new_mode;
- newd.vec_flags = VEC_ADVSIMD;
+ newd.vec_flags = d->vec_flags;
newd.op_mode = newd.vmode;
newd.op_vec_flags = newd.vec_flags;
newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
new file mode 100644
@@ -0,0 +1,106 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt32_t vint32 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVFloat32_t vfloat32 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE) \
+ TYPE \
+ TYPE##_zip1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 8, 9, 2, 3, 10, 11); \
+ } \
+ \
+ TYPE \
+ TYPE##_zip2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 4, 5, 12, 13, 6, 7, 14, 15); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 8, 9, 4, 5, 12, 13); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 2, 3, 10, 11, 6, 7, 14, 15); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 4, 5, 8, 9, 12, 13); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15); \
+ }
+
+/*
+** vint32_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint32_zip2_d:
+** zip2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint32_trn1_d:
+** trn1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint32_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint32_uzp1_d:
+** uzp1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint32_uzp2_d:
+** uzp2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+TESTS (vint32)
+
+/*
+** vfloat32_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat32_zip2_d:
+** zip2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat32_trn1_d:
+** trn1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat32_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat32_uzp1_d:
+** uzp1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat32_uzp2_d:
+** uzp2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+TESTS (vfloat32)
new file mode 100644
@@ -0,0 +1,277 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVUint16_t vuint16 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVFloat16_t vfloat16 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVBfloat16_t vbfloat16 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE) \
+ TYPE \
+ TYPE##_zip1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 2, 3, 16, 17, 18, 19, \
+ 4, 5, 6, 7, 20, 21, 22, 23); \
+ } \
+ \
+ TYPE \
+ TYPE##_zip2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 8, 9, 10, 11, 24, 25, 26, 27, \
+ 12, 13, 14, 15, 28, 29, 30, 31); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 2, 3, 16, 17, 18, 19, \
+ 8, 9, 10, 11, 24, 25, 26, 27); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 4, 5, 6, 7, 20, 21, 22, 23, \
+ 12, 13, 14, 15, 28, 29, 30, 31); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 2, 3, 8, 9, 10, 11, \
+ 16, 17, 18, 19, 24, 25, 26, 27); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 4, 5, 6, 7, 12, 13, 14, 15, \
+ 20, 21, 22, 23, 28, 29, 30, 31); \
+ } \
+ \
+ TYPE \
+ TYPE##_zip1_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 16, 17, 2, 3, 18, 19, \
+ 4, 5, 20, 21, 6, 7, 22, 23); \
+ } \
+ \
+ TYPE \
+ TYPE##_zip2_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 8, 9, 24, 25, 10, 11, 26, 27, \
+ 12, 13, 28, 29, 14, 15, 30, 31); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn1_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 16, 17, 4, 5, 20, 21, \
+ 8, 9, 24, 25, 12, 13, 28, 29); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn2_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 2, 3, 18, 19, 6, 7, 22, 23, \
+ 10, 11, 26, 27, 14, 15, 30, 31); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp1_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 4, 5, 8, 9, 12, 13, \
+ 16, 17, 20, 21, 24, 25, 28, 29); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp2_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15, \
+ 18, 19, 22, 23, 26, 27, 30, 31); \
+ }
+
+/*
+** vuint16_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_zip2_d:
+** zip2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_trn1_d:
+** trn1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_uzp1_d:
+** uzp1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_uzp2_d:
+** uzp2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vuint16_zip1_s:
+** zip1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vuint16_zip2_s:
+** zip2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vuint16_trn1_s:
+** trn1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vuint16_trn2_s:
+** trn2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vuint16_uzp1_s:
+** uzp1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vuint16_uzp2_s:
+** uzp2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+TESTS (vuint16)
+
+/*
+** vfloat16_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_zip2_d:
+** zip2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_trn1_d:
+** trn1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_uzp1_d:
+** uzp1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_uzp2_d:
+** uzp2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vfloat16_zip1_s:
+** zip1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vfloat16_zip2_s:
+** zip2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vfloat16_trn1_s:
+** trn1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vfloat16_trn2_s:
+** trn2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vfloat16_uzp1_s:
+** uzp1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vfloat16_uzp2_s:
+** uzp2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+TESTS (vfloat16)
+
+/*
+** vbfloat16_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_zip2_d:
+** zip2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_trn1_d:
+** trn1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_uzp1_d:
+** uzp1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_uzp2_d:
+** uzp2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vbfloat16_zip1_s:
+** zip1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vbfloat16_zip2_s:
+** zip2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vbfloat16_trn1_s:
+** trn1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vbfloat16_trn2_s:
+** trn2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vbfloat16_uzp1_s:
+** uzp1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vbfloat16_uzp2_s:
+** uzp2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+TESTS (vbfloat16)
new file mode 100644
@@ -0,0 +1,91 @@
+/* { dg-options "-O -msve-vector-bits=256" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt8_t vint8 __attribute__((arm_sve_vector_bits(256)));
+
+#define TESTS(TYPE) \
+ TYPE \
+ TYPE##_zip1_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 2, 3, 4, 5, 6, 7, \
+ 32, 33, 34, 35, 36, 37, 38, 39, \
+ 8, 9, 10, 11, 12, 13, 14, 15, \
+ 40, 41, 42, 43, 44, 45, 46, 47); \
+ } \
+ \
+ TYPE \
+ TYPE##_zip2_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 16, 17, 18, 19, 48, 49, 50, 51, \
+ 20, 21, 22, 23, 52, 53, 54, 55, \
+ 24, 25, 26, 27, 56, 57, 58, 59, \
+ 28, 29, 30, 31, 60, 61, 62, 63); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn1_h (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 32, 33, 4, 5, 36, 37, \
+ 8, 9, 40, 41, 12, 13, 44, 45, \
+ 16, 17, 48, 49, 20, 21, 52, 53, \
+ 24, 25, 56, 57, 28, 29, 60, 61); \
+ } \
+ \
+ TYPE \
+ TYPE##_trn2_d (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 8, 9, 10, 11, 12, 13, 14, 15, \
+ 40, 41, 42, 43, 44, 45, 46, 47, \
+ 24, 25, 26, 27, 28, 29, 30, 31, \
+ 56, 57, 58, 59, 60, 61, 62, 63); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp1_s (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 0, 1, 2, 3, 8, 9, 10, 11, \
+ 16, 17, 18, 19, 24, 25, 26, 27, \
+ 32, 33, 34, 35, 40, 41, 42, 43, \
+ 48, 49, 50, 51, 56, 57, 58, 59); \
+ } \
+ \
+ TYPE \
+ TYPE##_uzp2_h (TYPE x, TYPE y) \
+ { \
+ return __builtin_shufflevector (x, y, 2, 3, 6, 7, 10, 11, 14, 15, \
+ 18, 19, 22, 23, 26, 27, 30, 31, \
+ 34, 35, 38, 39, 42, 43, 46, 47, \
+ 50, 51, 54, 55, 58, 59, 62, 63); \
+ }
+
+/*
+** vint8_zip1_d:
+** zip1 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint8_zip2_s:
+** zip2 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vint8_trn1_h:
+** trn1 z0\.h, z0\.h, z1\.h
+** ret
+*/
+/*
+** vint8_trn2_d:
+** trn2 z0\.d, z0\.d, z1\.d
+** ret
+*/
+/*
+** vint8_uzp1_s:
+** uzp1 z0\.s, z0\.s, z1\.s
+** ret
+*/
+/*
+** vint8_uzp2_h:
+** uzp2 z0\.h, z0\.h, z1\.h
+** ret
+*/
+TESTS (vint8)
new file mode 100644
@@ -0,0 +1,113 @@
+/* { dg-options "-O -msve-vector-bits=256 -fgimple" } */
+/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */
+
+typedef __SVInt8_t vint8 __attribute__((arm_sve_vector_bits(256)));
+typedef __SVBool_t vbool __attribute__((arm_sve_vector_bits(256)));
+
+/*
+** uzp1_h:
+** uzp1 p0\.h, p0\.h, p1\.h
+** ret
+*/
+vbool __GIMPLE
+uzp1_h (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 0, 1, 4, 5, 8, 9, 12, 13,
+ 16, 17, 20, 21, 24, 25, 28, 29,
+ 32, 33, 36, 37, 40, 41, 44, 45,
+ 48, 49, 52, 53, 56, 57, 60, 61 });
+ return z;
+}
+
+/*
+** uzp2_s:
+** uzp2 p0\.s, p0\.s, p1\.s
+** ret
+*/
+vbool __GIMPLE
+uzp2_s (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 4, 5, 6, 7, 12, 13, 14, 15,
+ 20, 21, 22, 23, 28, 29, 30, 31,
+ 36, 37, 38, 39, 44, 45, 46, 47,
+ 52, 53, 54, 55, 60, 61, 62, 63 });
+ return z;
+}
+
+/*
+** trn1_d:
+** trn1 p0\.d, p0\.d, p1\.d
+** ret
+*/
+vbool __GIMPLE
+trn1_d (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 0, 1, 2, 3, 4, 5, 6, 7,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 48, 49, 50, 51, 52, 53, 54, 55 });
+ return z;
+}
+
+/*
+** trn2_h:
+** trn2 p0\.h, p0\.h, p1\.h
+** ret
+*/
+vbool __GIMPLE
+trn2_h (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 2, 3, 34, 35, 6, 7, 38, 39,
+ 10, 11, 42, 43, 14, 15, 46, 47,
+ 18, 19, 50, 51, 22, 23, 54, 55,
+ 26, 27, 58, 59, 30, 31, 62, 63 });
+ return z;
+}
+
+/*
+** zip1_d:
+** zip1 p0\.d, p0\.d, p1\.d
+** ret
+*/
+vbool __GIMPLE
+zip1_d (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 0, 1, 2, 3, 4, 5, 6, 7,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 40, 41, 42, 43, 44, 45, 46, 47 });
+ return z;
+}
+
+/*
+** zip2_s:
+** zip2 p0\.s, p0\.s, p1\.s
+** ret
+*/
+vbool __GIMPLE
+zip2_s (vbool x, vbool y)
+{
+ vbool z;
+
+ z = __VEC_PERM (x, y, _Literal (vint8)
+ { 16, 17, 18, 19, 48, 49, 50, 51,
+ 20, 21, 22, 23, 52, 53, 54, 55,
+ 24, 25, 26, 27, 56, 57, 58, 59,
+ 28, 29, 30, 31, 60, 61, 62, 63 });
+ return z;
+}