===================================================================
@@ -5605,6 +5605,56 @@ choose_ready (struct ready_list *ready,
}
}
+/* Move insn scheduled_insns[I] to the position J in scheduled_insns. */
+
+static void
+move_insns (int i, int j)
+{
+ rtx insn = scheduled_insns[i];
+ scheduled_insns.ordered_remove (i);
+ scheduled_insns.safe_insert (j, insn);
+}
+
+/* If the last cond jump and the cond register setting insn are consecutive
+ before scheduling, and are scheduled away from each other, this func
+ tries to rearrange insns in scheduled_insns and keep those two insns
+ together. This is good for performance on microarchitectures supporting
+ macro-fusion. */
+
+static void
+adjust_for_macro_fusion ()
+{
+ int i = -1, length;
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ rtx insn;
+ rtx last = scheduled_insns.last();
+
+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+ length = scheduled_insns.length ();
+ if (any_condjump_p (last) && reg_referenced_p (cc_reg_1, PATTERN (last)))
+ {
+ for (i = length - 2; i >= 0; i--)
+ {
+ insn = scheduled_insns[i];
+ if (modified_in_p (cc_reg_1, insn))
+ break;
+ }
+ }
+ if (i < 0 || i == length - 2)
+ return;
+
+ if (NEXT_INSN (insn) != last)
+ return;
+
+ if (!targetm.sched.macro_fusion_pair_p
+ || !targetm.sched.macro_fusion_pair_p (insn, last))
+ return;
+
+ move_insns (i, length - 2);
+}
+
/* This function is called when we have successfully scheduled a
block. It uses the schedule stored in the scheduled_insns vector
to rearrange the RTL. PREV_HEAD is used as the anchor to which we
@@ -6421,6 +6471,9 @@ schedule_block (basic_block *target_bb,
if (success)
{
+ if (targetm.sched.macro_fusion_p
+ && targetm.sched.macro_fusion_p ())
+ adjust_for_macro_fusion ();
commit_schedule (prev_head, tail, target_bb);
if (sched_verbose)
fprintf (sched_dump, ";; total time = %d\n", clock_var);
===================================================================
@@ -4940,6 +4940,10 @@ them: try the first ones in this list fi
@hook TARGET_SCHED_REORDER2
+@hook TARGET_SCHED_MACRO_FUSION_P
+
+@hook TARGET_SCHED_MACRO_FUSION_PAIR_P
+
@hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
@hook TARGET_SCHED_INIT
===================================================================
@@ -6553,6 +6553,18 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void)
+This hook is used to check whether target platform supports macro fusion.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx
@var{condgen}, rtx @var{condjmp})
+This hook is used to check whether two insns could be macro fused for
+target microarchitecture. Now it is used in scheduler to adjust scheduling
+result for macro-fusion. If this hook returns true for the given insn pair
+(@var{condgen} and @var{condjmp}), scheduler will reschedule @var{condgen}
+to the position just before condjmp before commit the scheduling result.
+@end deftypefn
+
@deftypefn {Target Hook} void
TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx
@var{tail})
This hook is called after evaluation forward dependencies of insns in
chain given by two parameter values (@var{head} and @var{tail}
===================================================================
@@ -196,7 +196,8 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS,
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
-DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch",
+ m_COREI7 | m_BDVER)
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
===================================================================
@@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
}
}
+/* Return true if target platform supports macro-fusion. */
+
+static bool
+ix86_macro_fusion_p ()
+{
+ if (TARGET_FUSE_CMP_AND_BRANCH)
+ return true;
+ else
+ return false;
+}
+
+/* Check whether current microarchitecture support macro fusion
+ for insn pair "CONDGEN + CONDJMP". Refer to
+ "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+ rtx src;
+ if (strcmp (ix86_tune_string, "corei7"))
+ {
+ /* For Nehalem. */
+ rtx single_set = single_set (condgen);
+ /* Nehalem doesn't support macro-fusion for add/sub+jmp. */
+ if (single_set == NULL_RTX)
+ return false;
+
+ src = SET_SRC (single_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp. */
+ if (get_attr_type (condgen) != TYPE_TEST
+ && get_attr_type (condgen) != TYPE_ICMP)
+ return false;
+ return true;
+ }
+ else if (strcmp (ix86_tune_string, "corei7-avx"))
+ {
+ /* For Sandybridge. */
+ enum rtx_code ccode;
+ rtx compare_set = NULL_RTX, test_if, cond;
+ rtx single_set = single_set (condgen);
+ if (single_set != NULL_RTX)
+ compare_set = single_set;
+ else
+ {
+ int i;
+ rtx pat = PATTERN (condgen);
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+ && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
+ compare_set = XVECEXP (pat, 0, i);
+ }
+
+ if (compare_set == NULL_RTX)
+ return false;
+ src = SET_SRC (compare_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for inc/dec +
+ unsigned comparison jmp. */
+ test_if = SET_SRC (pc_set (condjmp));
+ cond = XEXP (test_if, 0);
+ ccode = GET_CODE (cond);
+ if (get_attr_type (condgen) == TYPE_INCDEC
+ && (ccode == GEU
+ || ccode == GTU
+ || ccode == LEU
+ || ccode == LTU))
+ return false;
+ return true;
+ }
+ return false;
+}
+
/* Try to reorder ready list to take advantage of Atom pipelined IMUL
execution. It is applied if
(1) IMUL instruction is on the top of list;
@@ -42982,6 +43075,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
===================================================================
@@ -1041,6 +1041,20 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.",
int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL)
+DEFHOOK
+(macro_fusion_p,
+ "This hook is used to check whether target platform supports macro fusion.",
+ bool, (void), NULL)
+
+DEFHOOK
+(macro_fusion_pair_p,
+ "This hook is used to check whether two insns could be macro fused for\n\
+target microarchitecture. Now it is used in scheduler to adjust scheduling\n\
+result for macro-fusion. If this hook returns true for the given insn pair\n\
+(@var{condgen} and @var{condjmp}), scheduler will reschedule @var{condgen}\n\
+to the position just before condjmp before commit the scheduling result.",
+ bool, (rtx condgen, rtx condjmp), NULL)
+
/* The following member value is a pointer to a function called
after evaluation forward dependencies of insns in chain given
by two parameter values (head and tail correspondingly). */