@@ -1231,6 +1231,122 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
}
}
+ else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p ()
+ && allow_new_pseudos)
+ {
+ struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+ int alt_cost;
+
+ unsigned HOST_WIDE_INT loval = value & 0xffffffff;
+ unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
+ bool bit31 = (loval & 0x80000000) != 0;
+ int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival);
+ int leading_shift = clz_hwi (loval) - clz_hwi (hival);
+ int shiftval = 0;
+
+ /* Adjust the shift into the high half accordingly. */
+ if ((trailing_shift > 0 && hival == (loval >> trailing_shift)))
+ shiftval = 32 - trailing_shift;
+ else if ((leading_shift > 0 && hival == (loval << leading_shift)))
+ shiftval = 32 + leading_shift;
+
+ if (shiftval && !bit31)
+ alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32),
+ mode);
+
+ /* For constants where the upper half is a shift of the lower half we
+ can do a shift followed by an or. */
+ if (shiftval && alt_cost < cost && !bit31)
+ {
+ /* We need to save the first constant we build. */
+ alt_codes[alt_cost - 3].save_temporary = true;
+
+ /* Now we want to shift the previously generated constant into the
+ high half. */
+ alt_codes[alt_cost - 2].code = ASHIFT;
+ alt_codes[alt_cost - 2].value = shiftval;
+ alt_codes[alt_cost - 2].use_uw = false;
+ alt_codes[alt_cost - 2].save_temporary = false;
+
+ /* And the final step, IOR the two halves together. Since this uses
+ the saved temporary, use CONCAT similar to what we do for Zbkb. */
+ alt_codes[alt_cost - 1].code = CONCAT;
+ alt_codes[alt_cost - 1].value = 0;
+ alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
+
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+
+ if (cost > 4 && !bit31 && TARGET_ZBA)
+ {
+ int value = 0;
+
+ /* Check for a shNadd. */
+ if (hival == loval * 3)
+ value = 3;
+ else if (hival == loval * 5)
+ value = 5;
+ else if (hival == loval * 9)
+ value = 9;
+
+ if (value)
+ alt_cost = 2 + riscv_build_integer_1 (alt_codes,
+ sext_hwi (loval, 32), mode);
+
+ /* For constants where the upper half is a shNadd of the lower half
+ we can do a similar transformation. */
+ if (value && alt_cost < cost)
+ {
+ alt_codes[alt_cost - 3].save_temporary = true;
+ alt_codes[alt_cost - 2].code = FMA;
+ alt_codes[alt_cost - 2].value = value;
+ alt_codes[alt_cost - 2].use_uw = false;
+ alt_codes[alt_cost - 2].save_temporary = false;
+ alt_codes[alt_cost - 1].code = CONCAT;
+ alt_codes[alt_cost - 1].value = 0;
+ alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
+
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ }
+
+ if (cost > 4 && !bit31)
+ {
+ int value = hival - loval;
+
+ /* For constants were the halves differ by less than 2048 we can
+ generate the upper half by using an addi on the lower half then
+ using a shift 32 followed by an or. */
+ if (abs (value) <= 2047)
+ {
+ alt_cost = 3 + riscv_build_integer_1 (alt_codes,
+ sext_hwi (loval, 32), mode);
+ if (alt_cost < cost)
+ {
+ alt_codes[alt_cost - 4].save_temporary = true;
+ alt_codes[alt_cost - 3].code = PLUS;
+ alt_codes[alt_cost - 3].value = value;
+ alt_codes[alt_cost - 3].use_uw = false;
+ alt_codes[alt_cost - 3].save_temporary = false;
+ alt_codes[alt_cost - 2].code = ASHIFT;
+ alt_codes[alt_cost - 2].value = 32;
+ alt_codes[alt_cost - 2].use_uw = false;
+ alt_codes[alt_cost - 2].save_temporary = false;
+ alt_codes[alt_cost - 1].code = CONCAT;
+ alt_codes[alt_cost - 1].value = 0;
+ alt_codes[alt_cost - 1].use_uw = false;
+ alt_codes[alt_cost - 1].save_temporary = false;
+
+ memcpy (codes, alt_codes, sizeof (alt_codes));
+ cost = alt_cost;
+ }
+ }
+ }
+ }
return cost;
}
@@ -2864,12 +2980,22 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
}
else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
{
- rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
- rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
- gcc_assert (t2);
- t2 = gen_lowpart (SImode, t2);
- emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
- x = t;
+ if (codes[i].code == CONCAT && !TARGET_ZBKB)
+ {
+ /* The two values should have no bits in common, so we can
+ use PLUS instead of IOR which has a higher chance of
+ using a compressed instruction. */
+ x = gen_rtx_PLUS (mode, x, old_value);
+ }
+ else
+ {
+ rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+ rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
+ gcc_assert (t2);
+ t2 = gen_lowpart (SImode, t2);
+ emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
+ x = t;
+ }
}
else
x = gen_rtx_fmt_ee (codes[i].code, mode,
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 45 } } */
+
+
+unsigned long foo_0x7857f2de7857f2de(void) { return 0x7857f2de7857f2deUL; }
+unsigned long foo_0x7fffdffe3fffefff(void) { return 0x7fffdffe3fffefffUL; }
+unsigned long foo_0x1ffff7fe3fffeffc(void) { return 0x1ffff7fe3fffeffcUL; }
+unsigned long foo_0x0a3fdbf0028ff6fc(void) { return 0x0a3fdbf0028ff6fcUL; }
+unsigned long foo_0x014067e805019fa0(void) { return 0x014067e805019fa0UL; }
+unsigned long foo_0x09d87e90009d87e9(void) { return 0x09d87e90009d87e9UL; }
+unsigned long foo_0x2302320000118119(void) { return 0x2302320000118119UL; }
+unsigned long foo_0x000711eb00e23d60(void) { return 0x000711eb00e23d60UL; }
+unsigned long foo_0x5983800001660e00(void) { return 0x5983800001660e00UL; }
new file mode 100644
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc_zba" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 45 } } */
+
+
+unsigned long foo_0x7907d89a2857f2de(void) { return 0x7907d89a2857f2deUL; }
+unsigned long foo_0x4fffaffb0fffefff(void) { return 0x4fffaffb0fffefffUL; }
+unsigned long foo_0x23ff6fdc03ffeffc(void) { return 0x23ff6fdc03ffeffcUL; }
+unsigned long foo_0x170faedc028ff6fc(void) { return 0x170faedc028ff6fcUL; }
+unsigned long foo_0x5704dee01d019fa0(void) { return 0x5704dee01d019fa0UL; }
+unsigned long foo_0x0589c731009d87e9(void) { return 0x0589c731009d87e9UL; }
+unsigned long foo_0x0057857d00118119(void) { return 0x0057857d00118119UL; }
+unsigned long foo_0x546b32e010e23d60(void) { return 0x546b32e010e23d60UL; }
+unsigned long foo_0x64322a0021660e00(void) { return 0x64322a0021660e00UL; }
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target rv64 } */
+/* We aggressively skip as we really just need to test the basic synthesis
+ which shouldn't vary based on the optimization level. -O1 seems to work
+ and eliminates the usual sources of extraneous dead code that would throw
+ off the counts. */
+/* { dg-skip-if "" { *-*-* } { "-O0" "-Og" "-O2" "-O3" "-Os" "-Oz" "-flto" } } */
+/* { dg-options "-march=rv64gc" } */
+
+/* Rather than test for a specific synthesis of all these constants or
+ having thousands of tests each testing one variant, we just test the
+ total number of instructions.
+
+ This isn't expected to change much and any change is worthy of a look. */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|pack|ret|sh1add|sh2add|sh3add|slli|srli|xori|or)" 65 } } */
+
+
+unsigned long foo_0x7857faae7857f2de(void) { return 0x7857faae7857f2deUL; }
+unsigned long foo_0x0ffff7fe0fffefff(void) { return 0x0ffff7fe0fffefffUL; }
+unsigned long foo_0x7857f2de7857faae(void) { return 0x7857f2de7857faaeUL; }
+unsigned long foo_0x7857f2af7857faae(void) { return 0x7857f2af7857faaeUL; }
+unsigned long foo_0x5fbfffff5fbffae5(void) { return 0x5fbfffff5fbffae5UL; }
+unsigned long foo_0x3d3079db3d3079ac(void) { return 0x3d3079db3d3079acUL; }
+unsigned long foo_0x046075fe046078a8(void) { return 0x046075fe046078a8UL; }
+unsigned long foo_0x2411811a24118119(void) { return 0x2411811a24118119UL; }
+unsigned long foo_0x70e23d6a70e23d6b(void) { return 0x70e23d6a70e23d6bUL; }
+unsigned long foo_0x0c01df8c0c01df7d(void) { return 0x0c01df8c0c01df7dUL; }
+unsigned long foo_0x7fff07d07fff0000(void) { return 0x7fff07d07fff0000UL; }
@@ -12,7 +12,7 @@
total number of instructions.
This isn't expected to change much and any change is worthy of a look. */
-/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli)" 7571 } } */
+/* { dg-final { scan-assembler-times "\\t(add|addi|bseti|li|ret|sh1add|sh2add|sh3add|slli|bclri)" 7575 } } */
unsigned long foo_0xfffff7fe7ffff7ff(void) { return 0xfffff7fe7ffff7ffUL; }
unsigned long foo_0xffffeffe7ffff7ff(void) { return 0xffffeffe7ffff7ffUL; }