@@ -23942,12 +23942,91 @@ aarch64_expand_vector_init_fallback (rtx target, rtx vals)
if (n_var != n_elts)
{
rtx copy = copy_rtx (vals);
+ bool is_index_seq = false;
+
+ /* If at least half of the elements of the vector are constants and all
+ these constant elements form a linear sequence of the form { B, B + S,
+ B + 2 * S, B + 3 * S, ... }, we can generate the vector with SVE's
+ INDEX instruction if SVE is available and then set the elements which
+ are not constant separately. More precisely, each constant element I
+ has to be B + I * S where B and S must be valid immediate operand for
+ an SVE INDEX instruction.
+
+ For example, { X, 1, 2, 3} is a vector satisfying these conditions and
+ we can generate a vector of all constants (i.e., { 0, 1, 2, 3 }) first
+ and then set the first element of the vector to X. */
+
+ if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+ && n_var <= n_elts / 2)
+ {
+ int const_idx = -1;
+ HOST_WIDE_INT const_val = 0;
+ int base = 16;
+ int step = 16;
+
+ for (int i = 0; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+
+ if (!CONST_INT_P (x))
+ continue;
+
+ if (const_idx == -1)
+ {
+ const_idx = i;
+ const_val = INTVAL (x);
+ }
+ else
+ {
+ if ((INTVAL (x) - const_val) % (i - const_idx) == 0)
+ {
+ HOST_WIDE_INT s
+ = (INTVAL (x) - const_val) / (i - const_idx);
+ if (s >= -16 && s <= 15)
+ {
+ int b = const_val - s * const_idx;
+ if (b >= -16 && b <= 15)
+ {
+ base = b;
+ step = s;
+ }
+ }
+ }
+ break;
+ }
+ }
+
+ if (base != 16
+ && (!CONST_INT_P (v0)
+ || (CONST_INT_P (v0) && INTVAL (v0) == base)))
+ {
+ if (!CONST_INT_P (v0))
+ XVECEXP (copy, 0, 0) = GEN_INT (base);
+
+ is_index_seq = true;
+ for (int i = 1; i < n_elts; ++i)
+ {
+ rtx x = XVECEXP (copy, 0, i);
+
+ if (CONST_INT_P (x))
+ {
+ if (INTVAL (x) != base + i * step)
+ {
+ is_index_seq = false;
+ break;
+ }
+ }
+ else
+ XVECEXP (copy, 0, i) = GEN_INT (base + i * step);
+ }
+ }
+ }
/* Load constant part of vector. We really don't care what goes into the
parts we will overwrite, but we're more likely to be able to load the
constant efficiently if it has fewer, larger, repeating parts
(see aarch64_simd_valid_immediate). */
- for (int i = 0; i < n_elts; i++)
+ for (int i = 0; !is_index_seq && i < n_elts; i++)
{
rtx x = XVECEXP (vals, 0, i);
if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
@@ -1,15 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-require-effective-target aarch64_little_endian } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
#include <arm_sve.h>
+/*
+** dupq:
+** index z0\.s, #0, #1
+** ins v0\.s\[0\], w0
+** dup z0\.q, z0\.q\[0\]
+** ret
+*/
svint32_t
dupq (int x)
{
return svdupq_s32 (x, 1, 2, 3);
}
-
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
@@ -1,15 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
/* To avoid needing big-endian header files. */
#pragma GCC aarch64 "arm_sve.h"
+/*
+** dupq:
+** index z0\.s, #3, #-1
+** ins v0\.s\[0\], w0
+** dup z0\.q, z0\.q\[0\]
+** ret
+*/
svint32_t
dupq (int x)
{
return svdupq_s32 (x, 1, 2, 3);
}
-
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[0\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
@@ -1,15 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
/* To avoid needing big-endian header files. */
#pragma GCC aarch64 "arm_sve.h"
+/*
+** dupq:
+** index z0\.s, #0, #1
+** ins v0\.s\[2\], w0
+** dup z0\.q, z0\.q\[0\]
+** ret
+*/
svint32_t
dupq (int x)
{
return svdupq_s32 (0, 1, x, 3);
}
-
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #0, #1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
@@ -1,15 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
/* To avoid needing big-endian header files. */
#pragma GCC aarch64 "arm_sve.h"
+/*
+** dupq:
+** index z0\.s, #3, #-1
+** ins v0\.s\[2\], w0
+** dup z0\.q, z0\.q\[0\]
+** ret
+*/
svint32_t
dupq (int x)
{
return svdupq_s32 (0, 1, x, 3);
}
-
-/* { dg-final { scan-assembler {\tindex\tz[0-9]+\.s, #3, #-1} } } */
-/* { dg-final { scan-assembler {\tins\tv[0-9]+\.s\[2\], w0\n} } } */
-/* { dg-final { scan-assembler {\tdup\tz[0-9]+\.q, z[0-9]+\.q\[0\]\n} } } */
new file mode 100644
@@ -0,0 +1,47 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef short v8hi __attribute__((vector_size(16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef long v2di __attribute__((vector_size(16)));
+
+/*
+** f:
+** index z0\.s, #0, #1
+** ins v0\.s\[1\], w0
+** ret
+*/
+v4si
+f (int x)
+{
+ return (v4si){ 0, x, 2, 3 };
+}
+
+/*
+** f1:
+** index z0\.s, #3, #-4
+** ins v0\.s\[1\], w0
+** ins v0\.s\[2\], w1
+** ret
+*/
+v4si
+f1 (int x, int y)
+{
+ return (v4si){ 3, x, y, -9 };
+}
+
+/*
+** f2:
+** index z0\.h, #4, #2
+** ins v0\.h\[0\], w0
+** ins v0\.h\[3\], w1
+** ins v0\.h\[7\], w2
+** ret
+*/
+v8hi
+f2 (short x, short y, short z)
+{
+ return (v8hi){ x, 6, 8, y, 12, 14, 16, z };
+}
+
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+typedef int v4si __attribute__ ((vector_size (16)));
+
+v4si
+f (int x, int y)
+{
+ return (v4si){ 1, x, y, 3 };
+}
+
+/* { dg-final { scan-assembler-not {index} } } */
We can still use SVE's INDEX instruction to construct vectors even if not all elements are constants. For example, { 0, x, 2, 3 } can be constructed by first using "INDEX #0, #1" to generate { 0, 1, 2, 3 }, and then set the elements which are non-constants separately. PR target/113328 gcc/ChangeLog: * config/aarch64/aarch64.cc (aarch64_expand_vector_init_fallback): Improve part-variable vector generation with SVE's INDEX if TARGET_SVE is available. gcc/testsuite/ChangeLog: * gcc.target/aarch64/sve/acle/general/dupq_1.c: Update test to use check-function-bodies. * gcc.target/aarch64/sve/acle/general/dupq_2.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_3.c: Likewise. * gcc.target/aarch64/sve/acle/general/dupq_4.c: Likewise. * gcc.target/aarch64/sve/vec_init_4.c: New test. * gcc.target/aarch64/sve/vec_init_5.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com> --- gcc/config/aarch64/aarch64.cc | 81 ++++++++++++++++++- .../aarch64/sve/acle/general/dupq_1.c | 12 ++- .../aarch64/sve/acle/general/dupq_2.c | 12 ++- .../aarch64/sve/acle/general/dupq_3.c | 12 ++- .../aarch64/sve/acle/general/dupq_4.c | 12 ++- .../gcc.target/aarch64/sve/vec_init_4.c | 47 +++++++++++ .../gcc.target/aarch64/sve/vec_init_5.c | 12 +++ 7 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_4.c create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/vec_init_5.c