===================================================================
@@ -24850,6 +24850,99 @@ ia32_multipass_dfa_lookahead (void)
}
}
+/* Return true if target platform supports macro-fusion. */
+
+static bool
+ix86_macro_fusion_p ()
+{
+ if (TARGET_FUSE_CMP_AND_BRANCH)
+ return true;
+ else
+ return false;
+}
+
+/* Check whether current microarchitecture support macro fusion
+ for insn pair "CONDGEN + CONDJMP". Refer to
+ "Intel Architectures Optimization Reference Manual". */
+
+static bool
+ix86_macro_fusion_pair_p (rtx condgen, rtx condjmp)
+{
+ rtx src;
+ if (!strcmp (ix86_tune_string, "corei7"))
+ {
+ /* For Nehalem. */
+ rtx single_set = single_set (condgen);
+ /* Nehalem doesn't support macro-fusion for add/sub+jmp. */
+ if (single_set == NULL_RTX)
+ return false;
+
+ src = SET_SRC (single_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Nehalem doesn't support macro-fusion for add/sub/dec/inc + jmp. */
+ if (get_attr_type (condgen) != TYPE_TEST
+ && get_attr_type (condgen) != TYPE_ICMP)
+ return false;
+ return true;
+ }
+ else if (!strcmp (ix86_tune_string, "corei7-avx"))
+ {
+ /* For Sandybridge. */
+ enum rtx_code ccode;
+ rtx compare_set = NULL_RTX, test_if, cond;
+ rtx single_set = single_set (condgen);
+ if (single_set != NULL_RTX)
+ compare_set = single_set;
+ else
+ {
+ int i;
+ rtx pat = PATTERN (condgen);
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ if (GET_CODE (XVECEXP (pat, 0, i)) == SET
+ && GET_CODE (SET_SRC (XVECEXP (pat, 0, i))) == COMPARE)
+ compare_set = XVECEXP (pat, 0, i);
+ }
+
+ if (compare_set == NULL_RTX)
+ return false;
+ src = SET_SRC (compare_set);
+ if (GET_CODE (src) != COMPARE)
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for cmp/test MEM-IMM
+ insn pattern. */
+ if ((MEM_P (XEXP (src, 0))
+ && CONST_INT_P (XEXP (src, 1)))
+ || (MEM_P (XEXP (src, 1))
+ && CONST_INT_P (XEXP (src, 0))))
+ return false;
+
+ /* Sandybridge doesn't support macro-fusion for inc/dec +
+ unsigned comparison jmp. */
+ test_if = SET_SRC (pc_set (condjmp));
+ cond = XEXP (test_if, 0);
+ ccode = GET_CODE (cond);
+ if (get_attr_type (condgen) == TYPE_INCDEC
+ && (ccode == GEU
+ || ccode == GTU
+ || ccode == LEU
+ || ccode == LTU))
+ return false;
+ return true;
+ }
+ return false;
+}
+
/* Try to reorder ready list to take advantage of Atom pipelined IMUL
execution. It is applied if
(1) IMUL instruction is on the top of list;
@@ -42982,6 +43075,10 @@ ix86_memmodel_check (unsigned HOST_WIDE_
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P ix86_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P ix86_macro_fusion_pair_p
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
===================================================================
@@ -196,7 +196,8 @@ DEF_TUNE (X86_TUNE_USE_VECTOR_CONVERTS,
/* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
with a subsequent conditional jump instruction into a single
compare-and-branch uop. */
-DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch", m_BDVER)
+DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH, "fuse_cmp_and_branch",
+ m_COREI7 | m_BDVER)
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_ATOM | m_SLM)
===================================================================
@@ -487,7 +487,6 @@ static void add_dependence_list (rtx, rt
static void add_dependence_list_and_free (struct deps_desc *, rtx,
rtx *, int, enum reg_note, bool);
static void delete_all_dependences (rtx);
-static void chain_to_prev_insn (rtx);
static void flush_pending_lists (struct deps_desc *, rtx, int, int);
static void sched_analyze_1 (struct deps_desc *, rtx, rtx);
@@ -1660,7 +1659,7 @@ delete_all_dependences (rtx insn)
chains backwards. Then we add the dependencies for the group to
the previous nonnote insn. */
-static void
+void
chain_to_prev_insn (rtx insn)
{
sd_iterator_def sd_it;
@@ -2821,6 +2820,35 @@ sched_analyze_2 (struct deps_desc *deps,
sched_deps_info->finish_rhs ();
}
+/* If the last cond jump and the cond register defining insn are consecutive
+ before scheduling, we want them to be in a schedule group. This is good
+ for performance on microarchitectures supporting macro-fusion. */
+
+static void
+group_insns_for_macro_fusion (rtx insn)
+{
+ unsigned int condreg1, condreg2;
+ rtx cc_reg_1;
+ rtx prev;
+
+ targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+ cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+ prev = prev_nonnote_nondebug_insn (insn);
+ if (!any_condjump_p (insn)
+ || !reg_referenced_p (cc_reg_1, PATTERN (insn))
+ || !prev
+ || !modified_in_p (cc_reg_1, prev))
+ return;
+
+ /* Different microarchitectures support macro fusions for different
+ combinations of insn pairs. */
+ if (!targetm.sched.macro_fusion_pair_p
+ || !targetm.sched.macro_fusion_pair_p (prev, insn))
+ return;
+
+ SCHED_GROUP_P (insn) = 1;
+}
+
/* Analyze an INSN with pattern X to find all dependencies. */
static void
sched_analyze_insn (struct deps_desc *deps, rtx x, rtx insn)
@@ -2844,6 +2872,10 @@ sched_analyze_insn (struct deps_desc *de
can_start_lhs_rhs_p = (NONJUMP_INSN_P (insn)
&& code == SET);
+ if (targetm.sched.macro_fusion_p
+ && targetm.sched.macro_fusion_p ())
+ group_insns_for_macro_fusion (insn);
+
if (may_trap_p (x))
/* Avoid moving trapping instructions across function calls that might
not always return. */
@@ -3504,7 +3536,7 @@ call_may_noreturn_p (rtx insn)
group, and if all INSN's dependencies should be moved to the first
instruction of that group. */
-static bool
+bool
chain_to_prev_insn_p (rtx insn)
{
rtx prev, x;
===================================================================
@@ -6553,6 +6553,17 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.
@end deftypefn
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_P (void)
+This hook is used to check whether target platform supports macro fusion.
+@end deftypefn
+
+@deftypefn {Target Hook} bool TARGET_SCHED_MACRO_FUSION_PAIR_P (rtx
@var{condgen}, rtx @var{condjmp})
+This hook is used to check whether two insns could be macro fused for
+target microarchitecture. If this hook returns true for the given insn pair
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched
+group, and they will not be scheduled apart.
+@end deftypefn
+
@deftypefn {Target Hook} void
TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK (rtx @var{head}, rtx
@var{tail})
This hook is called after evaluation forward dependencies of insns in
chain given by two parameter values (@var{head} and @var{tail}
===================================================================
@@ -4940,6 +4940,10 @@ them: try the first ones in this list fi
@hook TARGET_SCHED_REORDER2
+@hook TARGET_SCHED_MACRO_FUSION_P
+
+@hook TARGET_SCHED_MACRO_FUSION_PAIR_P
+
@hook TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
@hook TARGET_SCHED_INIT
===================================================================
@@ -1302,6 +1302,8 @@ extern void finish_deps_global (void);
extern void deps_analyze_insn (struct deps_desc *, rtx);
extern void remove_from_deps (struct deps_desc *, rtx);
extern void init_insn_reg_pressure_info (rtx);
+extern bool chain_to_prev_insn_p (rtx insn);
+extern void chain_to_prev_insn (rtx);
extern dw_t get_dep_weak (ds_t, ds_t);
extern ds_t set_dep_weak (ds_t, ds_t, dw_t);
===================================================================
@@ -2507,7 +2507,7 @@ add_branch_dependences (rtx head, rtx ta
}
if (!targetm.have_conditional_execution ())
- return;
+ goto chain_to_prev_insn;
/* Finally, if the block ends in a jump, and we are doing intra-block
scheduling, make sure that the branch depends on any COND_EXEC insns
@@ -2543,7 +2543,7 @@ add_branch_dependences (rtx head, rtx ta
could remove always-true predicates. */
if (!reload_completed || ! (JUMP_P (tail) || JUMP_TABLE_DATA_P (tail)))
- return;
+ goto chain_to_prev_insn;
insn = tail;
while (insn != head)
@@ -2557,6 +2557,23 @@ add_branch_dependences (rtx head, rtx ta
if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
add_dependence (tail, insn, REG_DEP_ANTI);
}
+
+ chain_to_prev_insn:
+ /* Control dependences also need to be chained to the prev insn
+ for sched group. */
+ insn = tail;
+ while (insn != head)
+ {
+ /* Fixup the dependencies in the sched group. */
+ if (JUMP_P (insn)
+ && chain_to_prev_insn_p (insn)
+ && !sel_sched_p ())
+ chain_to_prev_insn (insn);
+
+ insn = PREV_INSN (insn);
+ }
+
+ return;
}
/* Data structures for the computation of data dependences in a regions. We
===================================================================
@@ -1041,6 +1041,19 @@ scheduling one insn causes other insns t
cycle. These other insns can then be taken into account properly.",
int, (FILE *file, int verbose, rtx *ready, int *n_readyp, int clock), NULL)
+DEFHOOK
+(macro_fusion_p,
+ "This hook is used to check whether target platform supports macro fusion.",
+ bool, (void), NULL)
+
+DEFHOOK
+(macro_fusion_pair_p,
+ "This hook is used to check whether two insns could be macro fused for\n\
+target microarchitecture. If this hook returns true for the given insn pair\n\
+(@var{condgen} and @var{condjmp}), scheduler will put them into a sched\n\
+group, and they will not be scheduled apart.",
+ bool, (rtx condgen, rtx condjmp), NULL)
+
/* The following member value is a pointer to a function called
after evaluation forward dependencies of insns in chain given
by two parameter values (head and tail correspondingly). */