[lra] patch to solve most scalability problems for LRA

Message ID	506DAD2E.1010507@redhat.com
State	New
Headers	show Return-Path: <gcc-patches-return-328070-incoming=patchwork.ozlabs.org@gcc.gnu.org> Comment: DKIM? See http://www.dkim.org Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Message-ID:Date:From:User-Agent:MIME-Version:To:Subject:Content-Type:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=T8cbLPCDsTZWEWyTj0uGFp3LSKKM2d+VDsA+pMkr7sUbXn9NTwhZaRc4Ocg5xL NjA2dDPcdkpFcnTvEOPmX8mM5d83Uc5JWEzfk2MFB0HtAUniOSh6KK81i8cozMF2 VyRFK3sPtbJYJNiyUIOIwZBCaHk7FCBa3X5VL1Dx2pxyY=; Message-ID: <506DAD2E.1010507@redhat.com> Date: Thu, 04 Oct 2012 11:37:18 -0400 From: Vladimir Makarov <vmakarov@redhat.com> User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:15.0) Gecko/20120828 Thunderbird/15.0 MIME-Version: 1.0 To: gcc-patches <gcc-patches@gcc.gnu.org> Subject: [lra] patch to solve most scalability problems for LRA Content-Type: multipart/mixed; boundary="------------000402060208040102080704" Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org

Index: ira.c =================================================================== --- ira.c (revision 192048) +++ ira.c (working copy) @@ -4327,8 +4327,26 @@ ira (FILE *f) bool loops_p; int max_regno_before_ira, ira_max_point_before_emit; int rebuild_p; + bool saved_flag_caller_saves = flag_caller_saves; + enum ira_region saved_flag_ira_region = flag_ira_region; + + ira_conflicts_p = optimize > 0; ira_use_lra_p = targetm.lra_p (); + /* If there are too many pseudos and/or basic blocks (e.g. 10K + pseudos and 10K blocks or 100K pseudos and 1K blocks), we will + use simplified and faster algorithms in LRA. */ + lra_simple_p + = (ira_use_lra_p && max_reg_num () >= (1 << 26) / last_basic_block); + if (lra_simple_p) + { + /* It permits to skip live range splitting in LRA. */ + flag_caller_saves = false; + /* There is no sense to do regional allocation when we use + simplified LRA. */ + flag_ira_region = IRA_REGION_ONE; + ira_conflicts_p = false; + } #ifndef IRA_NO_OBSTACK gcc_obstack_init (&ira_obstack); @@ -4349,7 +4367,6 @@ ira (FILE *f) ira_dump_file = stderr; } - ira_conflicts_p = optimize > 0; setup_prohibited_mode_move_regs (); df_note_add_problem (); @@ -4530,6 +4547,13 @@ ira (FILE *f) /* See comment for find_moveable_pseudos call. */ if (ira_conflicts_p) move_unallocated_pseudos (); + + /* Restore original values. */ + if (lra_simple_p) + { + flag_caller_saves = saved_flag_caller_saves; + flag_ira_region = saved_flag_ira_region; + } } static void Index: lra-assigns.c =================================================================== --- lra-assigns.c (revision 192050) +++ lra-assigns.c (working copy) @@ -1186,46 +1186,50 @@ assign_by_spills (void) improve_inheritance (&changed_pseudo_bitmap); bitmap_clear (&non_reload_pseudos); bitmap_clear (&changed_insns); - /* We should not assign to original pseudos of inheritance pseudos - or split pseudos if any its inheritance pseudo did not get hard - register or any its split pseudo was not split because undo - inheritance/split pass will extend live range of such inheritance - or split pseudos. */ - bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack); - EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi) - if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 - && reg_renumber[u] < 0 && bitmap_bit_p (&lra_inheritance_pseudos, u)) - bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); - EXECUTE_IF_SET_IN_BITMAP (&lra_split_pseudos, 0, u, bi) - if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 - && reg_renumber[u] >= 0 && bitmap_bit_p (&lra_split_pseudos, u)) - bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); - for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++) - if (((i < lra_constraint_new_regno_start - && ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i)) - || (bitmap_bit_p (&lra_inheritance_pseudos, i) - && lra_reg_info[i].restore_regno >= 0) - || (bitmap_bit_p (&lra_split_pseudos, i) - && lra_reg_info[i].restore_regno >= 0) - || bitmap_bit_p (&lra_optional_reload_pseudos, i)) - && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0 - && regno_allocno_class_array[i] != NO_REGS) - sorted_pseudos[n++] = i; - bitmap_clear (&do_not_assign_nonreload_pseudos); - if (n != 0 && lra_dump_file != NULL) - fprintf (lra_dump_file, " Reassing non-reload pseudos\n"); - qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func); - for (i = 0; i < n; i++) + if (! lra_simple_p) { - regno = sorted_pseudos[i]; - hard_regno = find_hard_regno_for (regno, &cost, -1); - if (hard_regno >= 0) + /* We should not assign to original pseudos of inheritance + pseudos or split pseudos if any its inheritance pseudo did + not get hard register or any its split pseudo was not split + because undo inheritance/split pass will extend live range of + such inheritance or split pseudos. */ + bitmap_initialize (&do_not_assign_nonreload_pseudos, &reg_obstack); + EXECUTE_IF_SET_IN_BITMAP (&lra_inheritance_pseudos, 0, u, bi) + if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 + && reg_renumber[u] < 0 + && bitmap_bit_p (&lra_inheritance_pseudos, u)) + bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); + EXECUTE_IF_SET_IN_BITMAP (&lra_split_pseudos, 0, u, bi) + if ((restore_regno = lra_reg_info[u].restore_regno) >= 0 + && reg_renumber[u] >= 0 && bitmap_bit_p (&lra_split_pseudos, u)) + bitmap_set_bit (&do_not_assign_nonreload_pseudos, restore_regno); + for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_reg_num (); i++) + if (((i < lra_constraint_new_regno_start + && ! bitmap_bit_p (&do_not_assign_nonreload_pseudos, i)) + || (bitmap_bit_p (&lra_inheritance_pseudos, i) + && lra_reg_info[i].restore_regno >= 0) + || (bitmap_bit_p (&lra_split_pseudos, i) + && lra_reg_info[i].restore_regno >= 0) + || bitmap_bit_p (&lra_optional_reload_pseudos, i)) + && reg_renumber[i] < 0 && lra_reg_info[i].nrefs != 0 + && regno_allocno_class_array[i] != NO_REGS) + sorted_pseudos[n++] = i; + bitmap_clear (&do_not_assign_nonreload_pseudos); + if (n != 0 && lra_dump_file != NULL) + fprintf (lra_dump_file, " Reassing non-reload pseudos\n"); + qsort (sorted_pseudos, n, sizeof (int), pseudo_compare_func); + for (i = 0; i < n; i++) { - assign_hard_regno (hard_regno, regno); + regno = sorted_pseudos[i]; + hard_regno = find_hard_regno_for (regno, &cost, -1); + if (hard_regno >= 0) + { + assign_hard_regno (hard_regno, regno); /* We change allocation for non-reload pseudo on this iteration -- mark the pseudo for invalidation of used alternatives of insns containing the pseudo. */ - bitmap_set_bit (&changed_pseudo_bitmap, regno); + bitmap_set_bit (&changed_pseudo_bitmap, regno); + } } } free (update_hard_regno_preference_check); Index: lra.c =================================================================== --- lra.c (revision 192050) +++ lra.c (working copy) @@ -2178,8 +2178,14 @@ setup_reg_spill_flag (void) lra_reg_spill_p = false; } +/* True if the current function is too big to use regular algorithms + in LRA. In other words, we should use simpler and faster algorithms + in LRA. It also means we should not worry about generation code + for caller saves. The value is set up in IRA. */ +bool lra_simple_p; + /* Major LRA entry function. F is a file should be used to dump LRA - debug info. */ + debug info. */ void lra (FILE *f) { @@ -2266,7 +2272,9 @@ lra (FILE *f) RS6000_PIC_OFFSET_TABLE_REGNUM uneliminable if we started to use a constant pool. */ lra_eliminate (false); - lra_inheritance (); + /* Do inheritance only for regular algorithms. */ + if (! lra_simple_p) + lra_inheritance (); /* We need live ranges for lra_assign -- so build them. */ lra_create_live_ranges (true); live_p = true; @@ -2275,10 +2283,16 @@ lra (FILE *f) If inheritance pseudos were spilled, the memory-memory moves involving them will be removed by pass undoing inheritance. */ - if (! lra_assign () && lra_coalesce ()) - live_p = false; - if (lra_undo_inheritance ()) - live_p = false; + if (lra_simple_p) + lra_assign (); + else + { + /* Do coalescing only for regular algorithms. */ + if (! lra_assign () && lra_coalesce ()) + live_p = false; + if (lra_undo_inheritance ()) + live_p = false; + } } bitmap_clear (&lra_inheritance_pseudos); bitmap_clear (&lra_split_pseudos); Index: lra.h =================================================================== --- lra.h (revision 192048) +++ lra.h (working copy) @@ -20,6 +20,8 @@ You should have received a copy of the G along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +extern bool lra_simple_p; + /* Return the allocno reg class of REGNO. If it is a reload pseudo, the pseudo should finally get hard register of the allocno class. */

[lra] patch to solve most scalability problems for LRA

Commit Message

Comments

Patch