commit 1390bf52c17a71834a1766c0222e4f8a74efb162
Author: Vladimir N. Makarov <vmakarov@redhat.com>
Date: Fri Dec 1 11:46:37 2023 -0500
[PR112445][LRA]: Fix "unable to find a register to spill" error
PR112445 is a very complicated bug occurring from interaction of
constraint subpass, inheritance, and hard reg live range splitting.
It is hard to debug this PR only from LRA standard logs. Therefore I
added dumping all func insns at the end of complicated sub-passes
(constraint, inheritance, undoing inheritance, hard reg live range
splitting, and rematerialization). As such output can be quite big,
it is switched only one level 7 of -fira-verbose value. The reason
for the bug is a skip of live-range splitting of hard reg (dx) on the
1st live range splitting subpass. Splitting is done for reload
pseudos around an original insn and its reload insns but the subpass
did not recognize such insn pattern because previous inheritance and
undoing inheritance subpasses extended a bit reload pseudo live range.
Although we undid inheritance in question, the result code was a bit
different from a code before the corresponding inheritance pass. The
following fixes the bug by restoring exact code before the
inheritance.
gcc/ChangeLog:
PR target/112445
* lra.h (lra): Add one more arg.
* lra-int.h (lra_verbose, lra_dump_insns): New externals.
(lra_dump_insns_if_possible): Ditto.
* lra.cc (lra_dump_insns): Dump all insns.
(lra_dump_insns_if_possible): Dump all insns for lra_verbose >= 7.
(lra_verbose): New global.
(lra): Add new arg. Setup lra_verbose from its value.
* lra-assigns.cc (lra_split_hard_reg_for): Dump insns if rtl
was changed.
* lra-remat.cc (lra_remat): Dump insns if rtl was changed.
* lra-constraints.cc (lra_inheritance): Dump insns.
(lra_constraints, lra_undo_inheritance): Dump insns if rtl
was changed.
(remove_inheritance_pseudos): Use restore reg if it is set up.
* ira.cc: (lra): Pass internal_flag_ira_verbose.
gcc/testsuite/ChangeLog:
PR target/112445
* gcc.target/i386/pr112445.c: New test.
@@ -5970,7 +5970,7 @@ do_reload (void)
ira_destroy ();
- lra (ira_dump_file);
+ lra (ira_dump_file, internal_flag_ira_verbose);
/* ???!!! Move it before lra () when we use ira_reg_equiv in
LRA. */
vec_free (reg_equivs);
@@ -1835,6 +1835,7 @@ lra_split_hard_reg_for (void)
if (spill_p)
{
bitmap_clear (&failed_reload_pseudos);
+ lra_dump_insns_if_possible ("changed func after splitting hard regs");
return true;
}
bitmap_clear (&non_reload_pseudos);
@@ -5537,6 +5537,8 @@ lra_constraints (bool first_p)
lra_assert (df_regs_ever_live_p (hard_regno + j));
}
}
+ if (changed_p)
+ lra_dump_insns_if_possible ("changed func after local");
return changed_p;
}
@@ -7277,7 +7279,7 @@ lra_inheritance (void)
bitmap_release (&invalid_invariant_regs);
bitmap_release (&check_only_regs);
free (usage_insns);
-
+ lra_dump_insns_if_possible ("func after inheritance");
timevar_pop (TV_LRA_INHERITANCE);
}
@@ -7477,13 +7479,16 @@ remove_inheritance_pseudos (bitmap remove_pseudos)
== get_regno (lra_reg_info[prev_sregno].restore_rtx))))
&& ! bitmap_bit_p (remove_pseudos, prev_sregno))
{
+ int restore_regno = get_regno (lra_reg_info[sregno].restore_rtx);
+ if (restore_regno < 0)
+ restore_regno = prev_sregno;
lra_assert (GET_MODE (SET_SRC (prev_set))
- == GET_MODE (regno_reg_rtx[sregno]));
+ == GET_MODE (regno_reg_rtx[restore_regno]));
/* Although we have a single set, the insn can
contain more one sregno register occurrence
as a source. Change all occurrences. */
lra_substitute_pseudo_within_insn (curr_insn, sregno,
- SET_SRC (prev_set),
+ regno_reg_rtx[restore_regno],
false);
/* As we are finishing with processing the insn
here, check the destination too as it might
@@ -7745,5 +7750,7 @@ lra_undo_inheritance (void)
EXECUTE_IF_SET_IN_BITMAP (&lra_split_regs, 0, regno, bi)
lra_reg_info[regno].restore_rtx = NULL_RTX;
change_p = undo_optional_reloads () || change_p;
+ if (change_p)
+ lra_dump_insns_if_possible ("changed func after undoing inheritance");
return change_p;
}
@@ -278,6 +278,7 @@ typedef class lra_insn_recog_data *lra_insn_recog_data_t;
/* lra.cc: */
extern FILE *lra_dump_file;
+extern int lra_verbose;
extern bool lra_hard_reg_split_p;
extern bool lra_asm_error_p;
@@ -312,6 +313,9 @@ extern void lra_emit_move (rtx, rtx);
extern void lra_update_dups (lra_insn_recog_data_t, signed char *);
extern void lra_asm_insn_error (rtx_insn *insn);
+extern void lra_dump_insns (FILE *f);
+extern void lra_dump_insns_if_possible (const char *title);
+
extern void lra_process_new_insns (rtx_insn *, rtx_insn *, rtx_insn *,
const char *);
@@ -1331,6 +1331,8 @@ lra_remat (void)
calculate_global_remat_bb_data ();
dump_candidates_and_remat_bb_data ();
result = do_remat ();
+ if (result)
+ lra_dump_insns_if_possible ("changed func after rematerialization");
all_cands.release ();
bitmap_clear (&temp_bitmap);
bitmap_clear (&subreg_regs);
@@ -1879,6 +1879,24 @@ setup_sp_offset (rtx_insn *from, rtx_insn *last)
return offset;
}
+/* Dump all func insns in a slim form. */
+void
+lra_dump_insns (FILE *f)
+{
+ dump_rtl_slim (f, get_insns (), NULL, -1, 0);
+}
+
+/* Dump all func insns in a slim form with TITLE when the dump file is open and
+ lra_verbose >=7. */
+void
+lra_dump_insns_if_possible (const char *title)
+{
+ if (lra_dump_file == NULL || lra_verbose < 7)
+ return;
+ fprintf (lra_dump_file, "%s:", title);
+ lra_dump_insns (lra_dump_file);
+}
+
/* Emit insns BEFORE before INSN and insns AFTER after INSN. Put the
insns onto the stack. Print about emitting the insns with
TITLE. */
@@ -2297,6 +2315,9 @@ bitmap_head lra_subreg_reload_pseudos;
/* File used for output of LRA debug information. */
FILE *lra_dump_file;
+/* How verbose should be the debug information. */
+int lra_verbose;
+
/* True if we split hard reg after the last constraint sub-pass. */
bool lra_hard_reg_split_p;
@@ -2332,14 +2353,15 @@ setup_reg_spill_flag (void)
bool lra_simple_p;
/* Major LRA entry function. F is a file should be used to dump LRA
- debug info. */
+ debug info with given verbosity. */
void
-lra (FILE *f)
+lra (FILE *f, int verbose)
{
int i;
bool live_p, inserted_p;
lra_dump_file = f;
+ lra_verbose = verbose;
lra_asm_error_p = false;
lra_pmode_pseudo = gen_reg_rtx (Pmode);
@@ -35,7 +35,7 @@ lra_get_allocno_class (int regno)
}
extern rtx lra_eliminate_regs (rtx, machine_mode, rtx);
-extern void lra (FILE *);
+extern void lra (FILE *, int);
extern void lra_init_once (void);
extern void lra_finish_once (void);
new file mode 100644
@@ -0,0 +1,22 @@
+/* { dg-do compile { target int128 } } */
+/* { dg-options "-O -march=cascadelake -fwrapv" } */
+
+typedef _Decimal64 d64;
+int foo0_f128_0, foo0_ret, foo0_s64_0;
+_Complex float foo0_cf128_0;
+
+void
+foo (char u8_0, char s8_0, _Complex unsigned cu8_0, int cs32_0,
+ _Complex _Float16 cf16_0, _Complex int cf32_0, int d32_0,
+ _Decimal64 d64_0)
+{
+ cu8_0 *= (__int128) foo0_s64_0;
+ int cf32_1 = __builtin_ccosf (cu8_0);
+ __int128 u128_r =
+ foo0_f128_0 + (__int128) foo0_cf128_0 + (__int128) __imag__ foo0_cf128_0;
+ int u64_r = u128_r + foo0_s64_0 + d64_0;
+ int u32_r = u64_r + cs32_0 + cf32_0 + __imag__ cf32_0 + cf32_1 + d32_0;
+ short u16_r = u32_r + cf16_0 + __imag__ cf16_0;
+ char u8_r = u16_r + u8_0 + s8_0 + cu8_0 + __imag__ cu8_0;
+ foo0_ret = u8_r;
+}