Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

Message ID	CA+4CFy5nM2Dw7kv0G61N5PKHoAanmAaKm+45oS4pN22TKgSAFg@mail.gmail.com
State	New
Headers	show Return-Path: <gcc-patches-return-349223-incoming=patchwork.ozlabs.org@gcc.gnu.org> DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :mime-version:in-reply-to:references:date:message-id:subject :from:to:cc:content-type; q=dns; s=default; b=LAhcsIWZe0ls4sGihI TcC2PGLF5L5Uhl1PnmFkRMkIfFCGCMlXrGOI7swgnOY6J+3dAFWdUtqeoZXO7eJX kTrveWKpdavfsmEBLCdM7fZU4+M9PElI7g+1MfyDMWVEB9WGCECGOG90Yz8tOZ3p Rtvw20v83sQRzpFtNkDPDmrX4= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org MIME-Version: 1.0 In-Reply-To: <CAMe9rOp1R8XACsL=v-JZkvpPzTOFiZhZPMqQXWkmPgHW5cjC6w@mail.gmail.com> References: <CAMe9rOo-dc7=ax8_pA21wuxnqphLBvf_Voi2n1OHJX7ZEab=ew@mail.gmail.com> <CA+4CFy4fqCRvM2Luw2_p6AEZOmucSV1KemntEO3_XU5TfzA-7A@mail.gmail.com> <CA+4CFy6gdxREYiJa2B70RBe2aUtLY3zQ9ShK9jGEy26Hdn9QOg@mail.gmail.com> <CAMe9rOp1R8XACsL=v-JZkvpPzTOFiZhZPMqQXWkmPgHW5cjC6w@mail.gmail.com> Date: Mon, 16 Sep 2013 10:44:53 -0700 Message-ID: <CA+4CFy5nM2Dw7kv0G61N5PKHoAanmAaKm+45oS4pN22TKgSAFg@mail.gmail.com> Subject: Re: Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion From: Wei Mi <wmi@google.com> To: "H.J. Lu" <hjl.tools@gmail.com> Cc: Alexander Monakov <amonakov@ispras.ru>, Steven Bosscher <stevenb.gcc@gmail.com>, GCC Patches <gcc-patches@gcc.gnu.org>, David Li <davidxl@google.com>, Kirill Yukhin <kirill.yukhin@gmail.com> Content-Type: text/plain; charset=ISO-8859-1

diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c index 14349be..7ea68cc 100644 --- a/gcc/config/i386/i386-c.c +++ b/gcc/config/i386/i386-c.c @@ -141,6 +141,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__corei7"); def_or_undef (parse_in, "__corei7__"); break; + case PROCESSOR_COREI7_AVX: + def_or_undef (parse_in, "__corei7_avx"); + def_or_undef (parse_in, "__corei7_avx__"); + break; case PROCESSOR_HASWELL: def_or_undef (parse_in, "__core_avx2"); def_or_undef (parse_in, "__core_avx2__"); @@ -239,6 +243,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, case PROCESSOR_COREI7: def_or_undef (parse_in, "__tune_corei7__"); break; + case PROCESSOR_COREI7_AVX: + def_or_undef (parse_in, "__tune_corei7_avx__"); + break; case PROCESSOR_HASWELL: def_or_undef (parse_in, "__tune_core_avx2__"); break; diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 536c357..1fd3f60 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -1908,8 +1908,9 @@ const struct processor_costs *ix86_cost = &pentium_cost; #define m_P4_NOCONA (m_PENT4 | m_NOCONA) #define m_CORE2 (1<<PROCESSOR_CORE2) #define m_COREI7 (1<<PROCESSOR_COREI7) +#define m_COREI7_AVX (1<<PROCESSOR_COREI7_AVX) #define m_HASWELL (1<<PROCESSOR_HASWELL) -#define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL) +#define m_CORE_ALL (m_CORE2 | m_COREI7 | m_COREI7_AVX | m_HASWELL) #define m_ATOM (1<<PROCESSOR_ATOM) #define m_SLM (1<<PROCESSOR_SLM) @@ -1984,10 +1985,10 @@ static const unsigned int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC; static const unsigned int x86_avx256_split_unaligned_load - = m_COREI7 | m_GENERIC; + = m_COREI7 | m_COREI7_AVX | m_GENERIC; static const unsigned int x86_avx256_split_unaligned_store - = m_COREI7 | m_BDVER | m_GENERIC; + = m_COREI7 | m_COREI7_AVX | m_BDVER | m_GENERIC; /* In case the average insn count for single function invocation is lower than this constant, emit fast (but longer) prologue and @@ -2377,6 +2378,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] = {&core_cost, 16, 10, 16, 10, 16}, /* Core i7 */ {&core_cost, 16, 10, 16, 10, 16}, + /* Core i7 avx */ + {&core_cost, 16, 10, 16, 10, 16}, /* Core avx2 */ {&core_cost, 16, 10, 16, 10, 16}, {&generic32_cost, 16, 7, 16, 7, 16}, @@ -2407,6 +2410,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = "nocona", "core2", "corei7", + "corei7-avx", "core-avx2", "atom", "slm", @@ -3091,12 +3095,12 @@ ix86_option_override_internal (bool main_args_p) {"corei7", PROCESSOR_COREI7, CPU_COREI7, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_POPCNT | PTA_FXSR}, - {"corei7-avx", PROCESSOR_COREI7, CPU_COREI7, + {"corei7-avx", PROCESSOR_COREI7_AVX, CPU_COREI7, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT}, - {"core-avx-i", PROCESSOR_COREI7, CPU_COREI7, + {"core-avx-i", PROCESSOR_COREI7_AVX, CPU_COREI7, PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE @@ -24477,6 +24481,7 @@ ix86_issue_rate (void) case PROCESSOR_PENTIUM4: case PROCESSOR_CORE2: case PROCESSOR_COREI7: + case PROCESSOR_COREI7_AVX: case PROCESSOR_HASWELL: case PROCESSOR_ATHLON: case PROCESSOR_K8: @@ -24834,6 +24839,7 @@ ia32_multipass_dfa_lookahead (void) case PROCESSOR_CORE2: case PROCESSOR_COREI7: + case PROCESSOR_COREI7_AVX: case PROCESSOR_HASWELL: case PROCESSOR_ATOM: case PROCESSOR_SLM: @@ -25474,6 +25480,7 @@ ix86_sched_init_global (FILE *dump ATTRIBUTE_UNUSED, { case PROCESSOR_CORE2: case PROCESSOR_COREI7: + case PROCESSOR_COREI7_AVX: case PROCESSOR_HASWELL: /* Do not perform multipass scheduling for pre-reload schedule to save compile time. */ @@ -29324,6 +29331,10 @@ get_builtin_code_for_version (tree decl, tree *predicate_list) arg_str = "corei7"; priority = P_PROC_SSE4_2; break; + case PROCESSOR_COREI7_AVX: + arg_str = "corei7-avx"; + priority = P_PROC_SSE4_2; + break; case PROCESSOR_ATOM: arg_str = "atom"; priority = P_PROC_SSSE3; diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index e820aa6..788cb8a 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -250,6 +250,7 @@ extern const struct processor_costs ix86_size_cost; #define TARGET_NOCONA (ix86_tune == PROCESSOR_NOCONA) #define TARGET_CORE2 (ix86_tune == PROCESSOR_CORE2) #define TARGET_COREI7 (ix86_tune == PROCESSOR_COREI7) +#define TARGET_COREI7_AVX (ix86_tune == PROCESSOR_COREI7_AVX) #define TARGET_HASWELL (ix86_tune == PROCESSOR_HASWELL) #define TARGET_GENERIC32 (ix86_tune == PROCESSOR_GENERIC32) #define TARGET_GENERIC64 (ix86_tune == PROCESSOR_GENERIC64) @@ -559,6 +560,7 @@ enum target_cpu_default TARGET_CPU_DEFAULT_nocona, TARGET_CPU_DEFAULT_core2, TARGET_CPU_DEFAULT_corei7, + TARGET_CPU_DEFAULT_corei7_avx, TARGET_CPU_DEFAULT_haswell, TARGET_CPU_DEFAULT_atom, TARGET_CPU_DEFAULT_slm, @@ -2118,6 +2120,7 @@ enum processor_type PROCESSOR_NOCONA, PROCESSOR_CORE2, PROCESSOR_COREI7, + PROCESSOR_COREI7_AVX, PROCESSOR_HASWELL, PROCESSOR_GENERIC32, PROCESSOR_GENERIC64, diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index e3a34ee..4ae5f70 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -118,9 +118,9 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency", m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC) DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal", - m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM) + m_COREI7 | m_COREI7_AVX | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM) DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal", - m_COREI7 | m_BDVER | m_SLM) + m_COREI7 | m_COREI7_AVX | m_BDVER | m_SLM) DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optimal", m_BDVER) /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies Patch2: 2013-09-16 Wei Mi <wmi@google.com> * gcc/config/i386/i386.c (ix86_macro_fusion_p): New Function. (ix86_macro_fusion_pair_p): Ditto. * gcc/config/i386/i386.h: Add new tune features about macro-fusion. * gcc/config/i386/x86-tune.def (DEF_TUNE): Ditto. * gcc/doc/tm.texi: Generated. * gcc/doc/tm.texi.in: Ditto. * gcc/haifa-sched.c (try_group_insn): New function. (group_insns_for_macro_fusion): Ditto. (sched_init): Call group_insns_for_macro_fusion. * gcc/sched-rgn.c (add_branch_dependences): Keep insns in a SCHED_GROUP at the end of BB to remain their location. * gcc/target.def: Add two hooks: macro_fusion_p and macro_fusion_pair_p. diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 1fd3f60..85b7aa0 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -24856,6 +24856,90 @@ ia32_multipass_dfa_lookahead (void) } } +/* Return true if target platform supports macro-fusion. */ + +static bool +ix86_macro_fusion_p () +{ + if (TARGET_FUSE_CMP_AND_BRANCH + && (!TARGET_64BIT || TARGET_FUSE_CMP_AND_BRANCH_64)) + return true; + else + return false; +} + +/* Check whether current microarchitecture support macro fusion + for insn pair "CONDGEN + CONDJMP". Refer to + "Intel Architectures Optimization Reference Manual". */ + +static bool +ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp) +{ + rtx src; + rtx single_set = single_set (condgen); + enum rtx_code ccode; + rtx compare_set = NULL_RTX, test_if, cond; + + if (single_set == NULL_RTX + && !TARGET_FUSE_ALU_AND_BRANCH) + return false; + + if (single_set != NULL_RTX) + compare_set = single_set; + else + { + int i; + rtx pat = PATTERN (condgen); + for (i = 0; i < XVECLEN (pat, 0); i++) + if (GET_CODE (XVECEXP (pat, 0, i)) == SET + && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE) + compare_set = XVECEXP (pat, 0, i); + } + if (compare_set == NULL_RTX) + return false; + src = SET_SRC (compare_set); + if (GET_CODE (src) != COMPARE) + return false; + + /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not + supported. */ + if ((MEM_P (XEXP (src, 0)) + && CONST_INT_P (XEXP (src, 1))) + || (MEM_P (XEXP (src, 1)) + && CONST_INT_P (XEXP (src, 0)))) + return false; + + test_if = SET_SRC (pc_set (condjmp)); + cond = XEXP (test_if, 0); + ccode = GET_CODE (cond); + /* Check whether conditional jump use Sign or Overflow Flags. */ + if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS + && (ccode == GE + || ccode == GT + || ccode == LE + || ccode == LT)) + return false; + + if (get_attr_type (condgen) == TYPE_TEST + || get_attr_type (condgen) == TYPE_ICMP) + return true; + + /* The following is the case that macro-fusion for alu + jmp. */ + if (!TARGET_FUSE_ALU_AND_BRANCH) + return false; + + /* Macro-fusion for inc/dec + unsigned conditional jump is not + supported. */ + if (get_attr_type (condgen) == TYPE_INCDEC + && (ccode == GEU + || ccode == GTU + || ccode == LEU + || ccode == LTU)) + return false; + + return true; +} + /* Try to reorder ready list to take advantage of Atom pipelined IMUL execution. It is applied if (1) IMUL instruction is on the top of list; @@ -42993,6 +43077,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val) #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ ia32_multipass_dfa_lookahead +#undef TARGET_SCHED_MACRO_FUSION_P +#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P +#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 788cb8a..048cc18 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -364,6 +364,12 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_USE_VECTOR_CONVERTS] #define TARGET_FUSE_CMP_AND_BRANCH \ ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH] +#define TARGET_FUSE_CMP_AND_BRANCH_64 \ + ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_64] +#define TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS \ + ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS] +#define TARGET_FUSE_ALU_AND_BRANCH \ + ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH] #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU] #define TARGET_VECTORIZE_DOUBLE \ ix86_tune_features[X86_TUNE_VECTORIZE_DOUBLE] diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def index 4ae5f70..a60d0f4 100644 --- a/gcc/config/i386/x86-tune.def +++ b/gcc/config/i386/x86-tune.def @@ -196,7 +196,22 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS, "use_vector_converts", m_AMDFAM10) /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction with a subsequent conditional jump instruction into a single compare-and-branch uop. */ -DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER) +DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", + m_CORE_ALL | m_BDVER) +/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent + conditional jump instruction for TARGET_64BIT. */ +DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64", + m_COREI7 | m_COREI7_AVX | m_HASWELL) +/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a + subsequent conditional jump instruction when the condition jump + check sign flag (SF) or overflow flag (OF). */ +DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags", + m_COREI7 | m_COREI7_AVX | m_HASWELL) +/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional + jump instruction when the alu instruction produces the CCFLAG consumed by + the conditional jump instruction. */ +DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch", + m_COREI7_AVX | m_HASWELL) /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag will impact LEA instruction selection. */ DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM) diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi index d15f53c..66b45b9 100644 --- a/gcc/doc/tm.texi +++ b/gcc/doc/tm.texi @@ -6553,6 +6553,17 @@ scheduling one insn causes other insns to become ready in the same cycle. These other insns can then be taken into account properly. @end deftypefn +@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void) +This hook is used to check whether target platform supports macro fusion. +@end deftypefn + +@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx @var{condgen}, rtx @var{condjmp}) +This hook is used to check whether two insns could be macro fused for +target microarchitecture. If this hook returns true for the given insn pair +(@var{condgen} and @var{condjmp}), scheduler will put them into a sched +group, and they will not be scheduled apart. +@end deftypefn + @deftypefn {Target Hook} void TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx @var{tail}) This hook is called after evaluation forward dependencies of insns in chain given by two parameter values (@var{head} and @var{tail} diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in index b51d7b3..361ee87 100644 --- a/gcc/doc/tm.texi.in +++ b/gcc/doc/tm.texi.in @@ -4940,6 +4940,10 @@ them: try the first ones in this list first. @hook TARGET_SCHED_REORDER2 +@hook TARGET_SCHED_MACRO_FUSION_P + +@hook TARGET_SCHED_MACRO_FUSION_PAIR_P + @hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK @hook TARGET_SCHED_INIT diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c index 61eaaef..d6726a9 100644 --- a/gcc/haifa-sched.c +++ b/gcc/haifa-sched.c @@ -6519,6 +6519,44 @@ setup_sched_dump (void) ? stderr : dump_file); } +static void +try_group_insn (rtx insn) +{ + unsigned int condreg1, condreg2; + rtx cc_reg_1; + rtx prev; + + targetm.fixed_condition_code_regs (&condreg1, &condreg2); + cc_reg_1 = gen_rtx_REG (CCmode, condreg1); + prev = prev_nonnote_nondebug_insn (insn); + if (!any_condjump_p (insn) + || !reg_referenced_p (cc_reg_1, PATTERN (insn)) + || !prev + || !modified_in_p (cc_reg_1, prev)) + return; + + /* Different microarchitectures support macro fusions for different + combinations of insn pairs. */ + if (!targetm.sched.macro_fusion_pair_p + || !targetm.sched.macro_fusion_pair_p (prev, insn)) + return; + + SCHED_GROUP_P (insn) = 1; +} + +/* If the last cond jump and the cond register defining insn are consecutive + before scheduling, we want them to be in a schedule group. This is good + for performance on microarchitectures supporting macro-fusion. */ + +static void +group_insns_for_macro_fusion () +{ + basic_block bb; + + FOR_EACH_BB (bb) + try_group_insn (BB_END (bb)); +} + /* Initialize some global state for the scheduler. This function works with the common data shared between all the schedulers. It is called from the scheduler specific initialization routine. */ @@ -6645,6 +6683,11 @@ sched_init (void) } curr_state = xmalloc (dfa_state_size); + + /* Group compare and branch insns for macro-fusion. */ + if (targetm.sched.macro_fusion_p + && targetm.sched.macro_fusion_p ()) + group_insns_for_macro_fusion (); } static void haifa_init_only_bb (basic_block, basic_block); diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c index e1a2dce..156359e 100644 --- a/gcc/sched-rgn.c +++ b/gcc/sched-rgn.c @@ -2443,6 +2443,8 @@ add_branch_dependences (rtx head, rtx tail) cc0 setters remain at the end because they can't be moved away from their cc0 user. + Predecessors of SCHED_GROUP_P instructions at the end remain at the end. + COND_EXEC insns cannot be moved past a branch (see e.g. PR17808). Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return @@ -2465,7 +2467,8 @@ add_branch_dependences (rtx head, rtx tail) #endif || (!reload_completed && sets_likely_spilled (PATTERN (insn))))) - || NOTE_P (insn)) + || NOTE_P (insn) + || (last != 0 && SCHED_GROUP_P (last))) { if (!NOTE_P (insn)) { diff --git a/gcc/target.def b/gcc/target.def index 6de513f..dae0378 100644 --- a/gcc/target.def +++ b/gcc/target.def @@ -1041,6 +1041,19 @@ scheduling one insn causes other insns to become ready in the same\n\ cycle. These other insns can then be taken into account properly.", int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL) +DEFHOOK +(macro_fusion_p, + "This hook is used to check whether target platform supports macro fusion.", + bool, (void), NULL) + +DEFHOOK +(macro_fusion_pair_p, + "This hook is used to check whether two insns could be macro fused for\n\ +target microarchitecture. If this hook returns true for the given insn pair\n\ +(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\ +group, and they will not be scheduled apart.", + bool, (rtx condgen, rtx condjmp), NULL) + /* The following member value is a pointer to a function called after evaluation forward dependencies of insns in chain given

Fwd: [PATCH] Scheduling result adjustment to enable macro-fusion

Commit Message

Comments

Patch