@@ -250,6 +250,7 @@ struct riscv_arg_info {
and each VALUE[i] is a constant integer. CODE[0] is undefined. */
struct riscv_integer_op {
bool use_uw;
+ bool save_temporary;
enum rtx_code code;
unsigned HOST_WIDE_INT value;
};
@@ -759,6 +760,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
codes[0].code = UNKNOWN;
codes[0].value = value;
codes[0].use_uw = false;
+ codes[0].save_temporary = false;
return 1;
}
if (TARGET_ZBS && SINGLE_BIT_MASK_OPERAND (value))
@@ -767,6 +769,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
codes[0].code = UNKNOWN;
codes[0].value = value;
codes[0].use_uw = false;
+ codes[0].save_temporary = false;
/* RISC-V sign-extends all 32bit values that live in a 32bit
register. To avoid paradoxes, we thus need to use the
@@ -796,6 +799,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost-1].code = PLUS;
alt_codes[alt_cost-1].value = low_part;
alt_codes[alt_cost-1].use_uw = false;
+ alt_codes[alt_cost-1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -810,6 +814,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost-1].code = XOR;
alt_codes[alt_cost-1].value = low_part;
alt_codes[alt_cost-1].use_uw = false;
+ alt_codes[alt_cost-1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -852,6 +857,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost-1].code = ASHIFT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = use_uw;
+ alt_codes[alt_cost-1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -873,9 +879,11 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
codes[0].value = (((unsigned HOST_WIDE_INT) value >> trailing_ones)
| (value << (64 - trailing_ones)));
codes[0].use_uw = false;
+ codes[0].save_temporary = false;
codes[1].code = ROTATERT;
codes[1].value = 64 - trailing_ones;
codes[1].use_uw = false;
+ codes[1].save_temporary = false;
cost = 2;
}
/* Handle the case where the 11 bit range of zero bits wraps around. */
@@ -888,9 +896,11 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
| ((unsigned HOST_WIDE_INT) value
>> (32 + upper_trailing_ones)));
codes[0].use_uw = false;
+ codes[0].save_temporary = false;
codes[1].code = ROTATERT;
codes[1].value = 32 - upper_trailing_ones;
codes[1].use_uw = false;
+ codes[1].save_temporary = false;
cost = 2;
}
@@ -917,6 +927,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost].code = AND;
alt_codes[alt_cost].value = ~(1UL << bit);
alt_codes[alt_cost].use_uw = false;
+ alt_codes[alt_cost].save_temporary = false;
alt_cost++;
nval &= ~(1UL << bit);
}
@@ -938,6 +949,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost - 1].code = FMA;
alt_codes[alt_cost - 1].value = 9;
alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -948,6 +960,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost - 1].code = FMA;
alt_codes[alt_cost - 1].value = 5;
alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -958,6 +971,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost - 1].code = FMA;
alt_codes[alt_cost - 1].value = 3;
alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -978,6 +992,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[alt_cost - 1].code = PLUS;
alt_codes[alt_cost - 1].value = adjustment;
alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -995,6 +1010,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
alt_codes[i].value = value & 0x7ffff000;
alt_codes[i].use_uw = false;
+ alt_codes[i].save_temporary = false;
value &= ~0x7ffff000;
i++;
}
@@ -1005,6 +1021,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[i].code = (i == 0 ? UNKNOWN : PLUS);
alt_codes[i].value = value & 0x7ff;
alt_codes[i].use_uw = false;
+ alt_codes[i].save_temporary = false;
value &= ~0x7ff;
i++;
}
@@ -1016,6 +1033,7 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
alt_codes[i].value = 1UL << bit;
alt_codes[i].use_uw = false;
+ alt_codes[i].save_temporary = false;
value &= ~(1ULL << bit);
i++;
}
@@ -1057,6 +1075,7 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
alt_codes[alt_cost-1].code = LSHIFTRT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = false;
+ alt_codes[alt_cost-1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -1069,6 +1088,7 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
alt_codes[alt_cost-1].code = LSHIFTRT;
alt_codes[alt_cost-1].value = shift;
alt_codes[alt_cost-1].use_uw = false;
+ alt_codes[alt_cost-1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -1093,6 +1113,7 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
alt_codes[alt_cost - 1].code = XOR;
alt_codes[alt_cost - 1].value = -1;
alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
memcpy (codes, alt_codes, sizeof (alt_codes));
cost = alt_cost;
}
@@ -1128,13 +1149,55 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
if (cost > 3 && TARGET_64BIT && TARGET_ZBKB)
{
unsigned HOST_WIDE_INT loval = value & 0xffffffff;
- unsigned HOST_WIDE_INT hival = value & ~loval;
- if (hival >> 32 == loval)
+ unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
+ if (hival == loval)
{
cost = 1 + riscv_build_integer_1 (codes, sext_hwi (loval, 32), mode);
codes[cost - 1].code = CONCAT;
codes[cost - 1].value = 0;
codes[cost - 1].use_uw = false;
+ codes[cost - 1].save_temporary = false;
+ }
+
+ /* An arbitrary 64 bit constant can be synthesized in 5 instructions
+ using zbkb. We may do better than that if the upper or lower half
+ can be synthsized with a single LUI, ADDI or BSET. Regardless the
+ basic steps are the same. */
+ if (cost > 3 && can_create_pseudo_p ())
+ {
+ struct riscv_integer_op hi_codes[RISCV_MAX_INTEGER_OPS];
+ struct riscv_integer_op lo_codes[RISCV_MAX_INTEGER_OPS];
+ int hi_cost, lo_cost;
+
+ /* Synthesize and get cost for each half. */
+ lo_cost
+ = riscv_build_integer_1 (lo_codes, sext_hwi (loval, 32), mode);
+ hi_cost
+ = riscv_build_integer_1 (hi_codes, sext_hwi (hival, 32), mode);
+
+ /* If profitable, finish synthesis using zbkb. */
+ if (cost > hi_cost + lo_cost + 1)
+ {
+ /* We need the low half independent of the high half. So
+ mark it has creating a temporary we'll use later. */
+ memcpy (codes, lo_codes,
+ lo_cost * sizeof (struct riscv_integer_op));
+ codes[lo_cost - 1].save_temporary = true;
+
+ /* Now the high half synthesis. */
+ memcpy (codes + lo_cost, hi_codes,
+ hi_cost * sizeof (struct riscv_integer_op));
+
+ /* Adjust the cost. */
+ cost = hi_cost + lo_cost + 1;
+
+ /* And finally (ab)use VEC_MERGE to indicate we want to
+ put merge the two parts together. */
+ codes[cost - 1].code = VEC_MERGE;
+ codes[cost - 1].value = 0;
+ codes[cost - 1].use_uw = false;
+ codes[cost - 1].save_temporary = false;
+ }
}
}
@@ -2656,23 +2719,25 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
x = riscv_split_integer (value, mode);
else
{
- codes[0].value = trunc_int_for_mode (codes[0].value, mode);
- /* Apply each binary operation to X. */
- x = GEN_INT (codes[0].value);
-
- for (i = 1; i < num_ops; i++)
+ rtx old_value = NULL_RTX;
+ for (i = 0; i < num_ops; i++)
{
- if (!can_create_pseudo_p ())
+ if (i != 0 && !can_create_pseudo_p ())
x = riscv_emit_set (temp, x);
- else
+ else if (i != 0)
x = force_reg (mode, x);
codes[i].value = trunc_int_for_mode (codes[i].value, mode);
- /* If the sequence requires using a "uw" form of an insn, we're
- going to have to construct the RTL ourselves and put it in
- a register to avoid force_reg/force_operand from mucking things
- up. */
- if (codes[i].use_uw)
+ if (codes[i].code == UNKNOWN)
{
+ /* UNKNOWN means load the constant value into X. */
+ x = GEN_INT (codes[i].value);
+ }
+ else if (codes[i].use_uw)
+ {
+ /* If the sequence requires using a "uw" form of an insn, we're
+ going to have to construct the RTL ourselves and put it in
+ a register to avoid force_reg/force_operand from mucking
+ things up. */
gcc_assert (TARGET_64BIT || TARGET_ZBA);
rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
@@ -2695,16 +2760,27 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
x = riscv_emit_set (t, x);
}
- else if (codes[i].code == CONCAT)
+ else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
{
rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
- rtx t2 = gen_lowpart (SImode, x);
+ rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
+ gcc_assert (t2);
+ t2 = gen_lowpart (SImode, t2);
emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
x = t;
}
else
x = gen_rtx_fmt_ee (codes[i].code, mode,
x, GEN_INT (codes[i].value));
+
+ /* If this entry in the code table indicates we should save away
+ the temporary holding the current value of X, then do so. */
+ if (codes[i].save_temporary)
+ {
+ gcc_assert (old_value == NULL_RTX);
+ x = force_reg (mode, x);
+ old_value = x;
+ }
}
}