===================================================================
@@ -6455,6 +6455,10 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.
@end deftypefn
+@hook TARGET_SCHED_MACRO_FUSION_P
+
+@hook TARGET_SCHED_MACRO_FUSION_PAIR_P
+
@hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
This hook is called after evaluation forward dependencies of insns in
chain given by two parameter values (@var{head} and @var{tail}
===================================================================
@@ -6551,6 +6551,17 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void)
+This hook is used to check whether target platform supports macro fusion.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx
@var{condgen}, rtx @var{condjmp})
+This hook is used to check whether two insns could be macro fused for
+target microarchitecture. If this hook returns true for the given insn pair
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched
+group, and they will not be scheduled apart.
+@end deftypefn
+
@deftypefn {Target Hook} void
TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx
@var{tail})
This hook is called after evaluation forward dependencies of insns in
chain given by two parameter values (@var{head} and @var{tail}
===================================================================
@@ -2004,7 +2004,7 @@ static unsigned int initial_ix86_tune_fe
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
- m_BDVER,
+ m_COREI7 | m_BDVER,
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
@@ -24845,6 +24845,99 @@ ia32_multipass_dfa_lookahead (void)
}
}
+/* Return true if target platform supports macro-fusion. */
+
+static bool
+ix86_macro_fusion_p ()
+{
+ if (TARGET_FUSE_CMP_AND_BRANCH)
+ return true;
+ else
+ return false;
+}
+
+/* Check whether current microarchitecture support macro fusion
+ for insn pair "CONDGEN + CONDJMP". Refer to
+ "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+ rtx src;
+ if (!strcmp (ix86_tune_string, "corei7"))
+ {
+ /* For Nehalem. */
+ rtx single_set = single_set (condgen);
+ /* Nehalem doesn't support macro-fusion for add/sub+jmp. */
+ if (single_set == NULL_RTX)
+ return false;
+
+ src = SET_SRC (single_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp. */
+ if (get_attr_type (condgen) != TYPE_TEST
+ && get_attr_type (condgen) != TYPE_ICMP)
+ return false;
+ return true;
+ }
+ else if (!strcmp (ix86_tune_string, "corei7-avx"))
+ {
+ /* For Sandybridge. */
+ enum rtx_code ccode;
+ rtx compare_set = NULL_RTX, test_if, cond;
+ rtx single_set = single_set (condgen);
+ if (single_set != NULL_RTX)
+ compare_set = single_set;
+ else
+ {
+ int i;
+ rtx pat = PATTERN (condgen);
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+ && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
+ compare_set = XVECEXP (pat, 0, i);
+ }
+
+ if (compare_set == NULL_RTX)
+ return false;
+ src = SET_SRC (compare_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for inc/dec +
+ unsigned comparison jmp. */
+ test_if = SET_SRC (pc_set (condjmp));
+ cond = XEXP (test_if, 0);
+ ccode = GET_CODE (cond);
+ if (get_attr_type (condgen) == TYPE_INCDEC
+ && (ccode == GEU
+ || ccode == GTU
+ || ccode == LEU
+ || ccode == LTU))
+ return false;
+ return true;
+ }
+ return false;
+}
+
/* Try to reorder ready list to take advantage of Atom pipelined IMUL
execution. It is applied if
(1) IMUL instruction is on the top of list;
@@ -42834,6 +42927,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
===================================================================
@@ -6511,6 +6511,49 @@ setup_sched_dump (void)
? stderr : dump_file);
}
+static void
+try_group_insn (rtx insn)
+{
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ rtx prev;
+
+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+ prev = prev_nonnote_nondebug_insn (insn);
+ if (!any_condjump_p (insn)
+ || !reg_referenced_p (cc_reg_1, PATTERN (insn))
+ || !prev
+ || !modified_in_p (cc_reg_1, prev))
+ return;
+
+ /* Different microarchitectures support macro fusions for different
+ combinations of insn pairs. */
+ if (!targetm.sched.macro_fusion_pair_p
+ || !targetm.sched.macro_fusion_pair_p (prev, insn))
+ return;
+
+ SCHED_GROUP_P (insn) = 1;
+}
+
+/* If the last cond jump and the cond register defining insn are consecutive
+ before scheduling, we want them to be in a schedule group. This is good
+ for performance on microarchitectures supporting macro-fusion. */
+
+static void
+group_insns_for_macro_fusion ()
+{
+ rtx insn;
+ basic_block bb;
+
+ FOR_EACH_BB (bb)
+ FOR_BB_INSNS (bb, insn)
+ {
+ if (INSN_P (insn))
+ try_group_insn (insn);
+ }
+}
+
/* Initialize some global state for the scheduler. This function works
with the common data shared between all the schedulers. It is called
from the scheduler specific initialization routine. */
@@ -6637,6 +6680,11 @@ sched_init (void)
}
curr_state = xmalloc (dfa_state_size);
+
+ /* Group compare and branch insns for macro-fusion. */
+ if (targetm.sched.macro_fusion_p
+ && targetm.sched.macro_fusion_p ())
+ group_insns_for_macro_fusion ();
}
static void haifa_init_only_bb (basic_block, basic_block);
===================================================================
@@ -591,6 +591,19 @@ DEFHOOK
"",
int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL)
+DEFHOOK
+(macro_fusion_p,
+ "This hook is used to check whether target platform supports macro fusion.",
+ bool, (void), NULL)
+
+DEFHOOK
+(macro_fusion_pair_p,
+ "This hook is used to check whether two insns could be macro fused for\n\
+target microarchitecture. If this hook returns true for the given insn pair\n\
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\
+group, and they will not be scheduled apart.",
+ bool, (rtx condgen, rtx condjmp), NULL)
+
/* The following member value is a pointer to a function called
after evaluation forward dependencies of insns in chain given
by two parameter values (head and tail correspondingly). */
===================================================================
@@ -2437,6 +2437,8 @@ add_branch_dependences (rtx head, rtx ta
cc0 setters remain at the end because they can't be moved away from
their cc0 user.
+ Predecessors of SCHED_GROUP_P instructions at the end remain at the end.
+
COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
Insns setting TARGET_CLASS_LIKELY_SPILLED_P registers (usually return
@@ -2459,7 +2461,8 @@ add_branch_dependences (rtx head, rtx ta
#endif
|| (!reload_completed
&& sets_likely_spilled (PATTERN (insn)))))
- || NOTE_P (insn))
+ || NOTE_P (insn)
+ || (last != 0 && SCHED_GROUP_P (last)))
{
if (!NOTE_P (insn))
{