Message ID | 1309384152-25027-12-git-send-email-rth@redhat.com |
---|---|
State | New |
Headers | show |
> While it could be possible to output_set_got such that we can > individually annotate the instructions, it's simpler to simply > admit that all processors currently being manufactured do want > deep branch prediction. At which point all of the complication > simply goes away. Note that most of modern CPUs special case call to next instruction, so they will work well with !X86_TUNE_DEEP_BRANCH_PREDICTION code. Honza > --- > gcc/config/i386/i386.c | 105 +++++++---------------------------------------- > gcc/config/i386/i386.h | 3 - > 2 files changed, 16 insertions(+), 92 deletions(-) > > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c > index 014401b..332e65b 100644 > --- a/gcc/config/i386/i386.c > +++ b/gcc/config/i386/i386.c > @@ -55,7 +55,6 @@ along with GCC; see the file COPYING3. If not see > #include "params.h" > #include "cselib.h" > #include "debug.h" > -#include "dwarf2out.h" > #include "sched-int.h" > #include "sbitmap.h" > #include "fibheap.h" > @@ -1847,10 +1846,6 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { > m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 > | m_CORE2I7 | m_GENERIC, > > - /* X86_TUNE_DEEP_BRANCH_PREDICTION */ > - m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 > - | m_CORE2I7 | m_GENERIC, > - > /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based > on simulation result. But after P4 was made, no performance benefit > was observed with branch hints. It also increases the code size. > @@ -8323,31 +8318,11 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) > > xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); > > - if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) > + if (!flag_pic) > { > xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); > > - if (!flag_pic) > - output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); > - else > - { > - output_asm_insn ("call\t%a2", xops); > -#ifdef DWARF2_UNWIND_INFO > - /* The call to next label acts as a push. */ > - if (dwarf2out_do_frame ()) > - { > - rtx insn; > - start_sequence (); > - insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, > - gen_rtx_PLUS (Pmode, > - stack_pointer_rtx, > - GEN_INT (-4)))); > - RTX_FRAME_RELATED_P (insn) = 1; > - dwarf2out_frame_debug (insn, true); > - end_sequence (); > - } > -#endif > - } > + output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); > > #if TARGET_MACHO > /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This > @@ -8358,29 +8333,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) > > targetm.asm_out.internal_label (asm_out_file, "L", > CODE_LABEL_NUMBER (XEXP (xops[2], 0))); > - > - if (flag_pic) > - { > - output_asm_insn ("pop%z0\t%0", xops); > -#ifdef DWARF2_UNWIND_INFO > - /* The pop is a pop and clobbers dest, but doesn't restore it > - for unwind info purposes. */ > - if (dwarf2out_do_frame ()) > - { > - rtx insn; > - start_sequence (); > - insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); > - dwarf2out_frame_debug (insn, true); > - insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, > - gen_rtx_PLUS (Pmode, > - stack_pointer_rtx, > - GEN_INT (4)))); > - RTX_FRAME_RELATED_P (insn) = 1; > - dwarf2out_frame_debug (insn, true); > - end_sequence (); > - } > -#endif > - } > } > else > { > @@ -8388,12 +8340,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) > get_pc_thunk_name (name, REGNO (dest)); > pic_labels_used |= 1 << REGNO (dest); > > -#ifdef DWARF2_UNWIND_INFO > - /* Ensure all queued register saves are flushed before the > - call. */ > - if (dwarf2out_do_frame ()) > - dwarf2out_flush_queued_reg_saves (); > -#endif > xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); > xops[2] = gen_rtx_MEM (QImode, xops[2]); > output_asm_insn ("call\t%X2", xops); > @@ -8408,13 +8354,8 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) > #endif > } > > - if (TARGET_MACHO) > - return ""; > - > - if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) > + if (!TARGET_MACHO) > output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); > - else > - output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); > > return ""; > } > @@ -10138,7 +10079,11 @@ ix86_expand_prologue (void) > insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); > } > else > - insn = emit_insn (gen_set_got (pic_offset_table_rtx)); > + { > + insn = emit_insn (gen_set_got (pic_offset_table_rtx)); > + RTX_FRAME_RELATED_P (insn) = 1; > + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); > + } > } > > /* In the pic_reg_used case, make sure that the got load isn't deleted > @@ -28979,12 +28924,7 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) > if (MACHOPIC_ATT_STUB) > switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); > else if (MACHOPIC_PURE) > - { > - if (TARGET_DEEP_BRANCH_PREDICTION) > - switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); > - else > - switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); > - } > + switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); > else > switch_to_section (darwin_sections[machopic_symbol_stub_section]); > > @@ -28998,19 +28938,11 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) > else if (MACHOPIC_PURE) > { > /* PIC stub. */ > - if (TARGET_DEEP_BRANCH_PREDICTION) > - { > - /* 25-byte PIC stub using "CALL get_pc_thunk". */ > - rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); > - output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ > - fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label); > - } > - else > - { > - /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */ > - fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label); > - fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label); > - } > + /* 25-byte PIC stub using "CALL get_pc_thunk". */ > + rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); > + output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ > + fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", > + label, lazy_ptr_name, label); > fprintf (file, "\tjmp\t*%%ecx\n"); > } > else > @@ -29039,13 +28971,8 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) > compatibility with existing dylibs. */ > if (MACHOPIC_PURE) > { > - /* PIC stubs. */ > - if (TARGET_DEEP_BRANCH_PREDICTION) > - /* 25-byte PIC stub using "CALL get_pc_thunk". */ > - switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); > - else > - /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */ > - switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); > + /* 25-byte PIC stub using "CALL get_pc_thunk". */ > + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); > } > else > /* 16-byte -mdynamic-no-pic stub. */ > diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h > index 8badcbb..1452226 100644 > --- a/gcc/config/i386/i386.h > +++ b/gcc/config/i386/i386.h > @@ -249,7 +249,6 @@ enum ix86_tune_indices { > X86_TUNE_PUSH_MEMORY, > X86_TUNE_ZERO_EXTEND_WITH_AND, > X86_TUNE_UNROLL_STRLEN, > - X86_TUNE_DEEP_BRANCH_PREDICTION, > X86_TUNE_BRANCH_PREDICTION_HINTS, > X86_TUNE_DOUBLE_WITH_ADD, > X86_TUNE_USE_SAHF, > @@ -323,8 +322,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; > #define TARGET_ZERO_EXTEND_WITH_AND \ > ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND] > #define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN] > -#define TARGET_DEEP_BRANCH_PREDICTION \ > - ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION] > #define TARGET_BRANCH_PREDICTION_HINTS \ > ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS] > #define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD] > -- > 1.7.5.4
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 014401b..332e65b 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -55,7 +55,6 @@ along with GCC; see the file COPYING3. If not see #include "params.h" #include "cselib.h" #include "debug.h" -#include "dwarf2out.h" #include "sched-int.h" #include "sbitmap.h" #include "fibheap.h" @@ -1847,10 +1846,6 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { m_486 | m_PENT | m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2I7 | m_GENERIC, - /* X86_TUNE_DEEP_BRANCH_PREDICTION */ - m_ATOM | m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 - | m_CORE2I7 | m_GENERIC, - /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based on simulation result. But after P4 was made, no performance benefit was observed with branch hints. It also increases the code size. @@ -8323,31 +8318,11 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); - if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) + if (!flag_pic) { xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); - if (!flag_pic) - output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); - else - { - output_asm_insn ("call\t%a2", xops); -#ifdef DWARF2_UNWIND_INFO - /* The call to next label acts as a push. */ - if (dwarf2out_do_frame ()) - { - rtx insn; - start_sequence (); - insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, - gen_rtx_PLUS (Pmode, - stack_pointer_rtx, - GEN_INT (-4)))); - RTX_FRAME_RELATED_P (insn) = 1; - dwarf2out_frame_debug (insn, true); - end_sequence (); - } -#endif - } + output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); #if TARGET_MACHO /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This @@ -8358,29 +8333,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) targetm.asm_out.internal_label (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (xops[2], 0))); - - if (flag_pic) - { - output_asm_insn ("pop%z0\t%0", xops); -#ifdef DWARF2_UNWIND_INFO - /* The pop is a pop and clobbers dest, but doesn't restore it - for unwind info purposes. */ - if (dwarf2out_do_frame ()) - { - rtx insn; - start_sequence (); - insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); - dwarf2out_frame_debug (insn, true); - insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, - gen_rtx_PLUS (Pmode, - stack_pointer_rtx, - GEN_INT (4)))); - RTX_FRAME_RELATED_P (insn) = 1; - dwarf2out_frame_debug (insn, true); - end_sequence (); - } -#endif - } } else { @@ -8388,12 +8340,6 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) get_pc_thunk_name (name, REGNO (dest)); pic_labels_used |= 1 << REGNO (dest); -#ifdef DWARF2_UNWIND_INFO - /* Ensure all queued register saves are flushed before the - call. */ - if (dwarf2out_do_frame ()) - dwarf2out_flush_queued_reg_saves (); -#endif xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); xops[2] = gen_rtx_MEM (QImode, xops[2]); output_asm_insn ("call\t%X2", xops); @@ -8408,13 +8354,8 @@ output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) #endif } - if (TARGET_MACHO) - return ""; - - if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) + if (!TARGET_MACHO) output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); - else - output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); return ""; } @@ -10138,7 +10079,11 @@ ix86_expand_prologue (void) insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); } else - insn = emit_insn (gen_set_got (pic_offset_table_rtx)); + { + insn = emit_insn (gen_set_got (pic_offset_table_rtx)); + RTX_FRAME_RELATED_P (insn) = 1; + add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); + } } /* In the pic_reg_used case, make sure that the got load isn't deleted @@ -28979,12 +28924,7 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) if (MACHOPIC_ATT_STUB) switch_to_section (darwin_sections[machopic_picsymbol_stub3_section]); else if (MACHOPIC_PURE) - { - if (TARGET_DEEP_BRANCH_PREDICTION) - switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); - else - switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); - } + switch_to_section (darwin_sections[machopic_picsymbol_stub2_section]); else switch_to_section (darwin_sections[machopic_symbol_stub_section]); @@ -28998,19 +28938,11 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) else if (MACHOPIC_PURE) { /* PIC stub. */ - if (TARGET_DEEP_BRANCH_PREDICTION) - { - /* 25-byte PIC stub using "CALL get_pc_thunk". */ - rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); - output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ - fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", label, lazy_ptr_name, label); - } - else - { - /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %eax". */ - fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%ecx\n", label, label); - fprintf (file, "\tmovl %s-LPC$%d(%%ecx),%%ecx\n", lazy_ptr_name, label); - } + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + rtx tmp = gen_rtx_REG (SImode, 2 /* ECX */); + output_set_got (tmp, NULL_RTX); /* "CALL ___<cpu>.get_pc_thunk.cx". */ + fprintf (file, "LPC$%d:\tmovl\t%s-LPC$%d(%%ecx),%%ecx\n", + label, lazy_ptr_name, label); fprintf (file, "\tjmp\t*%%ecx\n"); } else @@ -29039,13 +28971,8 @@ machopic_output_stub (FILE *file, const char *symb, const char *stub) compatibility with existing dylibs. */ if (MACHOPIC_PURE) { - /* PIC stubs. */ - if (TARGET_DEEP_BRANCH_PREDICTION) - /* 25-byte PIC stub using "CALL get_pc_thunk". */ - switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); - else - /* 26-byte PIC stub using inline picbase: "CALL L42 ! L42: pop %ebx". */ - switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); + /* 25-byte PIC stub using "CALL get_pc_thunk". */ + switch_to_section (darwin_sections[machopic_lazy_symbol_ptr2_section]); } else /* 16-byte -mdynamic-no-pic stub. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 8badcbb..1452226 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -249,7 +249,6 @@ enum ix86_tune_indices { X86_TUNE_PUSH_MEMORY, X86_TUNE_ZERO_EXTEND_WITH_AND, X86_TUNE_UNROLL_STRLEN, - X86_TUNE_DEEP_BRANCH_PREDICTION, X86_TUNE_BRANCH_PREDICTION_HINTS, X86_TUNE_DOUBLE_WITH_ADD, X86_TUNE_USE_SAHF, @@ -323,8 +322,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; #define TARGET_ZERO_EXTEND_WITH_AND \ ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND] #define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN] -#define TARGET_DEEP_BRANCH_PREDICTION \ - ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION] #define TARGET_BRANCH_PREDICTION_HINTS \ ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS] #define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]