@@ -1312,25 +1312,61 @@ expand_const_vector (rtx target, rtx src)
/* Generate the variable-length vector following this rule:
{ a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
E.g. { 0, 0, 8, 8, 16, 16, ... } */
- /* We want to create a pattern where value[ix] = floor (ix /
+
+ /* We want to create a pattern where value[idx] = floor (idx /
NPATTERNS). As NPATTERNS is always a power of two we can
- rewrite this as = ix & -NPATTERNS. */
+ rewrite this as = idx & -NPATTERNS. */
/* Step 2: VID AND -NPATTERNS:
{ 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
*/
rtx imm
= gen_int_mode (-builder.npatterns (), builder.inner_mode ());
- rtx tmp = gen_reg_rtx (builder.mode ());
- rtx and_ops[] = {tmp, vid, imm};
+ rtx tmp1 = gen_reg_rtx (builder.mode ());
+ rtx and_ops[] = {tmp1, vid, imm};
icode = code_for_pred_scalar (AND, builder.mode ());
emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+ /* Step 3: Convert to step size 1. */
+ rtx tmp2 = gen_reg_rtx (builder.mode ());
+ /* log2 (npatterns) to get the shift amount to convert
+ Eg. { 0, 0, 0, 0, 4, 4, ... }
+ into { 0, 0, 0, 0, 1, 1, ... }. */
+ HOST_WIDE_INT shift_amt = exact_log2 (builder.npatterns ()) ;
+ rtx shift = gen_int_mode (shift_amt, builder.inner_mode ());
+ rtx shift_ops[] = {tmp2, tmp1, shift};
+ icode = code_for_pred_scalar (ASHIFTRT, builder.mode ());
+ emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+ /* Step 4: Multiply to step size n. */
+ HOST_WIDE_INT step_size =
+ INTVAL (builder.elt (builder.npatterns ()))
+ - INTVAL (builder.elt (0));
+ rtx tmp3 = gen_reg_rtx (builder.mode ());
+ if (pow2p_hwi (step_size))
+ {
+ /* Power of 2 can be handled with a left shift. */
+ HOST_WIDE_INT shift = exact_log2 (step_size);
+ rtx shift_amount = gen_int_mode (shift, Pmode);
+ insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+ rtx ops[] = {tmp3, tmp2, shift_amount};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+ else
+ {
+ rtx mult_amt = gen_int_mode (step_size, builder.inner_mode ());
+ insn_code icode = code_for_pred_scalar (MULT, builder.mode ());
+ rtx ops[] = {tmp3, tmp2, mult_amt};
+ emit_vlmax_insn (icode, BINARY_OP, ops);
+ }
+
+ /* Step 5: Add starting value to all elements. */
HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
if (init_val == 0)
- emit_move_insn (target, tmp);
+ emit_move_insn (target, tmp3);
else
{
rtx dup = gen_const_vector_dup (builder.mode (), init_val);
- rtx add_ops[] = {target, tmp, dup};
+ rtx add_ops[] = {target, tmp3, dup};
icode = code_for_pred (PLUS, builder.mode ());
emit_vlmax_insn (icode, BINARY_OP, add_ops);
}
Prior to this patch the expander would emit vectors like: { 0, 0, 5, 5, 10, 10, ...} as: { 0, 0, 2, 2, 4, 4, ...} This patch sets the step size to the requested value. gcc/ChangeLog: * config/riscv/riscv-v.cc (expand_const_vector): Fix STEP size in expander. Signed-off-by: Patrick O'Neill <patrick@rivosinc.com> --- Detected with the existing testsuite after patch 8/9 is applied: FAIL: gcc.dg/torture/vshuf-v16qi.c -O2 execution test FAIL: gcc.dg/torture/vshuf-v8hi.c -O2 execution test FAIL: gcc.dg/torture/vshuf-v8qi.c -O2 execution test --- gcc/config/riscv/riscv-v.cc | 48 ++++++++++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 6 deletions(-) -- 2.34.1