@@ -1104,7 +1104,7 @@ expand_vec_series (rtx dest, rtx base, rtx step, rtx vid)
emit_move_insn (dest, result);
}
-/* Subroutine of riscv_vector_expand_vector_init.
+/* Subroutine of riscv_vector_expand_vector_init and expand_const_vector.
Works as follows:
(a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
(b) Skip leading elements from BUILDER, which are the same as
@@ -1129,7 +1129,7 @@ expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
}
}
-/* Subroutine of expand_vec_init to handle case
+/* Subroutine of expand_vec_init and expand_const_vector to handle case
when all trailing elements of builder are same.
This works as follows:
(a) Use expand_insn interface to broadcast last vector element in TARGET.
@@ -1248,6 +1248,8 @@ expand_const_vector (rtx target, rtx src)
}
builder.finalize ();
+ bool emit_catch_all_pattern = false;
+
if (CONST_VECTOR_DUPLICATE_P (src))
{
/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
@@ -1555,10 +1557,24 @@ expand_const_vector (rtx target, rtx src)
}
else
/* TODO: We will enable more variable-length vector in the future. */
- gcc_unreachable ();
+ emit_catch_all_pattern = true;
}
else
- gcc_unreachable ();
+ emit_catch_all_pattern = true;
+
+ if (emit_catch_all_pattern)
+ {
+ int nelts = XVECLEN (src, 0);
+
+ /* Optimize trailing same elements sequence:
+ v = {y, y2, y3, y4, y5, x, x, x, x, x, x, x, x, x, x, x}; */
+ if (!expand_vector_init_trailing_same_elem (result, builder, nelts))
+ /* Handle common situation with vslide1down. This function can handle
+ any case of vec_init<mode>. Only the cases that are not optimized
+ above will fall through here. This prevents us from dumping
+ to/reading from the stack to initialize vectors. */
+ expand_vector_init_insert_elems (result, builder, nelts);
+ }
if (result != target)
emit_move_insn (target, result);
@@ -2141,12 +2141,13 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
out range of [-16, 15].
- 3. const series vector.
...etc. */
- if (riscv_v_ext_mode_p (GET_MODE (x)))
+ machine_mode mode = GET_MODE (x);
+ if (riscv_v_ext_mode_p (mode))
{
rtx elt;
if (const_vec_duplicate_p (x, &elt))
{
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL)
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
/* Duplicate values of 0/1 can be emitted using vmv.v.i. */
return 1;
@@ -2154,7 +2155,7 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
system since the ELT constant value can not held
within a single register to disable reload a DI
register vec_duplicate into vmv.v.x. */
- scalar_mode smode = GET_MODE_INNER (GET_MODE (x));
+ scalar_mode smode = GET_MODE_INNER (mode);
if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
&& !immediate_operand (elt, Pmode))
return 0;
@@ -2190,14 +2191,22 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
return 1;
}
- if (CONST_VECTOR_STEPPED_P (x))
+ if (CONST_VECTOR_DUPLICATE_P (x))
+ {
+ /* TODO: Cost cases of CONST_VECTOR_DUPLICATE_P.
+ We get ICEs with could not split insn from an
+ expand_vector_broadcast emitted during LRA when this op is
+ costed. For now disallow all cases. */
+ return 0;
+ }
+ else if (CONST_VECTOR_STEPPED_P (x))
{
/* Some cases are unhandled so we need construct a builder to
detect/allow those cases to be handled by the fallthrough
handler. */
unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
- rvv_builder builder (GET_MODE(x), npatterns, nelts_per_pattern);
+ rvv_builder builder (mode, npatterns, nelts_per_pattern);
for (unsigned int i = 0; i < nelts_per_pattern; i++)
{
for (unsigned int j = 0; j < npatterns; j++)
@@ -2224,12 +2233,97 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
return 1;
}
- /* Fallthrough. */
+ /* Fallthrough to catch all pattern. */
+ }
+
+ int nelts = const_vector_encoded_nelts (x);
+ if (nelts == 0)
+ return 0;
+
+ /* Most arbitrary vectors can be constructed with a splat and
+ vslide1up/down. */
+
+ /* The arbitrary vector's elements must be supported by the
+ vslide1up/down operation. */
+ scalar_mode smode = GET_MODE_INNER (mode);
+ if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD))
+ /* vslide1up/down does not handle elts larger than a register. */
+ return 0;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
+ /* vslide1up/down does not handle BImode elts. */
+ return 0;
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+ && maybe_lt (GET_MODE_SIZE (smode), 4))
+ /* Our vslide1up/down insn def does not handle HF. */
+ return 0;
+
+ /* We already checked for a fully const vector above. Calculate
+ the number of leading/trailing elements covered by the splat. */
+ int leading_ndups = 1;
+ rtx first_elt = CONST_VECTOR_ENCODED_ELT (x, 0);
+ for (int i = 1; i < nelts; i++)
+ {
+ if (first_elt == CONST_VECTOR_ENCODED_ELT (x, i))
+ leading_ndups += 1;
+ else
+ break;
+ }
+ int trailing_ndups = 1;
+ if (leading_ndups >= nelts / 2)
+ /* We already know leading_ndups >= trailing_ndups so just use
+ leading ndups. */
+ trailing_ndups = 0;
+ else
+ {
+ rtx last_elt = CONST_VECTOR_ENCODED_ELT (x, nelts - 1);
+ for (int i = nelts - 2; i > 0; i--)
+ {
+ if (last_elt == CONST_VECTOR_ENCODED_ELT (x, i))
+ trailing_ndups += 1;
+ else
+ break;
+ }
}
+
+ /* We always splat the leading/trailing elt with the most contigious
+ duplicates. */
+ int splatted_leading = 0;
+ int splatted_trailing = 0;
+ int splat_materialize = 0;
+ if (leading_ndups > trailing_ndups)
+ {
+ splatted_leading = leading_ndups;
+ splat_materialize = riscv_const_insns (CONST_VECTOR_ELT (x, 0),
+ allow_new_pseudos);
+ }
+ else
+ {
+ splatted_trailing = trailing_ndups;
+ splat_materialize =
+ riscv_const_insns (CONST_VECTOR_ELT (x, nelts - 1),
+ allow_new_pseudos);
+ }
+
+ /* Splat leading/trailing elt. */
+ int total_insns = splat_materialize + 1;
+
+ /* TODO: There's room for improvement here. eg. duplicate
+ constants won't require another materialization. */
+ for (int i = splatted_leading; i < nelts - splatted_trailing; i++)
+ {
+ rtx elt = CONST_VECTOR_ELT (x, i);
+ int n = riscv_const_insns (elt, allow_new_pseudos);
+ if (n == 0)
+ n = 4; /* memory access. */
+ /* materialize + vslide1{up|down}. */
+ total_insns += n + 1;
+ }
+
+ return total_insns;
}
/* TODO: We may support more const vector in the future. */
- return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0;
+ return x == CONST0_RTX (mode) ? 1 : 0;
}
case CONST:
new file mode 100644
@@ -0,0 +1,13 @@
+/* Vector constants can be constructed using splat + vslide1up. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d -O3" } */
+
+void slide1up(int a, int *b) {
+ int c[4] = {1, 10, 10, 10};
+ for (int j = 0; j < 4; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.i} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1up.vx} 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Vector constants can be constructed using splat + vslide1up. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+
+void slide1up(int a, int *b) {
+ int c[8] = {1, 1, 1, 10, 10, 10, 10, 10};
+ for (int j = 0; j < 8; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.i} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1up.vx} 3 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Vector constants can be constructed using splat + vslide1down. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d -O3" } */
+
+void slide1up(int a, int *b) {
+ int c[4] = {1, 1, 1, 10};
+ for (int j = 0; j < 4; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.i} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1down.vx} 1 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Vector constants can be constructed using splat + vslide1down. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+
+void slide1up(int a, int *b) {
+ int c[8] = {1, 1, 1, 1, 1, 10, 10, 10};
+ for (int j = 0; j < 8; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.i} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1down.vx} 3 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Arbitrary vector constants can be constructed using splat + vslide1{up|down}. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl128b -mabi=lp64d -O3" } */
+
+void slide1down_arbitrary(int a, int *b) {
+ int c[4] = {100, 12, 324, 1};
+ for (int j = 0; j < 4; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.x} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1(up|down).vx} 3 } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* Arbitrary vector constants can be constructed using splat + vslide1{up|down}. */
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv_zvl256b -mabi=lp64d -O3" } */
+
+void slide1down_arbitrary(int a, int *b) {
+ int c[8] = {100, 12, 324, 1, 57, 16, 98, 60};
+ for (int j = 0; j < 8; j++) {
+ b[j] = c[j];
+ }
+}
+
+/* { dg-final { scan-assembler-times {vmv.v.x} 1 } } */
+/* { dg-final { scan-assembler-times {vslide1(up|down).vx} 7 } } */
Also explicitly disallow CONST_VECTOR_DUPLICATE_P for now. CONST_VECTOR_DUPLICATE_P was previously disallowed implicitly. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_vec_series): Update comment. (expand_vector_init_insert_elems): Ditto. (expand_const_vector): Add catch-all pattern. * config/riscv/riscv.cc (riscv_const_insns): Add costing for catch-all pattern. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/autovec/materialize-1.c: New test. * gcc.target/riscv/rvv/autovec/materialize-2.c: New test. * gcc.target/riscv/rvv/autovec/materialize-3.c: New test. * gcc.target/riscv/rvv/autovec/materialize-4.c: New test. * gcc.target/riscv/rvv/autovec/materialize-5.c: New test. * gcc.target/riscv/rvv/autovec/materialize-6.c: New test. Signed-off-by: Patrick O'Neill <patrick@rivosinc.com> --- This causes 4 new regressions on glibc rv64gcv: Appears to be spilling due to the increased register pressure from materializing constants for vslide1down: FAIL: gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c scan-assembler-not jr FAIL: gcc.dg/vect/costmodel/riscv/rvv/pr113112-4.c scan-assembler-not sp Caused due to vle32/64 being replaced with splat & vslide1down: FAIL: gcc.target/riscv/rvv/autovec/vls/init-5.c -O3 -ftree-vectorize -mrvv-vector-bits=scalable scan-assembler-times vle32\\.v 7 FAIL: gcc.target/riscv/rvv/autovec/vls/init-7.c -O3 -ftree-vectorize -mrvv-vector-bits=scalable scan-assembler-times vle64\\.v 7 I'm not sure if it's profitable to replace a lmul8 load with 127 vslide1down.vx ops but we're being honest with the middle end when returning the # of insns we'll be emitting when costing... --- gcc/config/riscv/riscv-v.cc | 24 +++- gcc/config/riscv/riscv.cc | 108 ++++++++++++++++-- .../riscv/rvv/autovec/materialize-1.c | 13 +++ .../riscv/rvv/autovec/materialize-2.c | 13 +++ .../riscv/rvv/autovec/materialize-3.c | 13 +++ .../riscv/rvv/autovec/materialize-4.c | 13 +++ .../riscv/rvv/autovec/materialize-5.c | 13 +++ .../riscv/rvv/autovec/materialize-6.c | 13 +++ 8 files changed, 199 insertions(+), 11 deletions(-) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-3.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-4.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-5.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/materialize-6.c -- 2.34.1