===================================================================
@@ -376,6 +376,7 @@ Objective-C and Objective-C++ Dialects}.
-fsched-spec-insn-heuristic -fsched-rank-heuristic @gol
-fsched-last-insn-heuristic -fsched-dep-count-heuristic @gol
-fschedule-insns -fschedule-insns2 -fsection-anchors @gol
+-fdispatch-sched @gol
-fselective-scheduling -fselective-scheduling2 @gol
-fsel-sched-pipelining -fsel-sched-pipelining-outer-loops @gol
-fsignaling-nans -fsingle-precision-constant -fsplit-ivs-in-unroller @gol
@@ -6464,6 +6465,16 @@ generated code and decrease its size by
increase above the number of available hard registers and as a
consequence register spills in the register allocation.
+@item -fdispatch-sched
+@opindex fdispatch-sched
+Enable dispatch scheduling. Dispatch scheduling is on when both
+@option{-march = bdver1} and instruction scheduling are enabled,
+i.e.@: with @option{-fschedule-insns}. Some machines such as AMD
+Bulldozer perform more efficiently if a group of instructions have
+certain alignment and certain operation mix. Dispatch scheduler
+aligns windows and puts the desired mix of instructions in dispatch
+windows.
+
@item -fsched-spec-load
@opindex fsched-spec-load
Allow speculative motion of some load instructions. This only makes
===================================================================
@@ -532,6 +532,7 @@ static void extend_h_i_d (void);
static void ready_add (struct ready_list *, rtx, bool);
static rtx ready_remove_first (struct ready_list *);
+static rtx ready_remove_first_dispatch (struct ready_list *ready);
static void queue_to_ready (struct ready_list *);
static int early_queue_to_ready (state_t, struct ready_list *);
@@ -2646,7 +2647,10 @@ choose_ready (struct ready_list *ready,
if (lookahead <= 0 || SCHED_GROUP_P (ready_element (ready, 0))
|| DEBUG_INSN_P (ready_element (ready, 0)))
{
- *insn_ptr = ready_remove_first (ready);
+ if (flag_dispatch_sched)
+ *insn_ptr = ready_remove_first_dispatch (ready);
+ else
+ *insn_ptr = ready_remove_first (ready);
return 0;
}
else
@@ -2656,6 +2660,7 @@ choose_ready (struct ready_list *ready,
rtx insn;
int try_data = 1, try_control = 1;
ds_t ts;
+ bool fits_in_dispatch_window_p = false;
insn = ready_element (ready, 0);
if (INSN_CODE (insn) < 0)
@@ -2664,6 +2669,14 @@ choose_ready (struct ready_list *ready,
return 0;
}
+ if (flag_dispatch_sched)
+ for (i = 1; i < ready->n_ready; i++)
+ if (fits_dispatch_window (ready_element (ready, i)))
+ {
+ fits_in_dispatch_window_p = true;
+ break;
+ }
+
if (spec_info
&& spec_info->flags & (PREFER_NON_DATA_SPEC
| PREFER_NON_CONTROL_SPEC))
@@ -2715,9 +2728,14 @@ choose_ready (struct ready_list *ready,
{
insn = ready_element (ready, i);
- ready_try [i]
- = ((!try_data && (TODO_SPEC (insn) & DATA_SPEC))
- || (!try_control && (TODO_SPEC (insn) & CONTROL_SPEC)));
+ if (fits_in_dispatch_window_p)
+ ready_try [i] = fits_dispatch_window (insn)
+ && ((!try_data && (TODO_SPEC (insn) & DATA_SPEC))
+ || (!try_control && (TODO_SPEC (insn) & CONTROL_SPEC)));
+ else
+ ready_try [i] = ((!try_data && (TODO_SPEC (insn) & DATA_SPEC))
+ || (!try_control
+ && (TODO_SPEC (insn) & CONTROL_SPEC)));
}
/* Let the target filter the search space. */
@@ -3144,6 +3162,10 @@ schedule_block (basic_block *target_bb)
last_scheduled_insn);
move_insn (insn, last_scheduled_insn, current_sched_info->next_tail);
+
+ if (flag_dispatch_sched)
+ add_to_dispatch_window (insn);
+
reemit_notes (insn);
last_scheduled_insn = insn;
@@ -3368,8 +3390,11 @@ sched_init (void)
flag_schedule_speculative_load = 0;
#endif
+ if (flag_dispatch_sched)
+ init_dispatch_sched ();
sched_pressure_p = (flag_sched_pressure && ! reload_completed
&& common_sched_info->sched_pass_id == SCHED_RGN_PASS);
+
if (sched_pressure_p)
ira_setup_eliminable_regset ();
@@ -5567,4 +5592,73 @@ sched_emit_insn (rtx pat)
return insn;
}
+/* Get first insn that fits in the dispatch window. If none exists return
+ first insn. */
+
+static rtx
+ready_remove_first_dispatch (struct ready_list *ready)
+{
+ int i;
+ rtx insn = ready_element (ready, 0);
+
+ if (ready->n_ready == 1
+ || INSN_CODE (insn) < 0
+ || !INSN_P (insn)
+ || !active_insn_p (insn)
+ || fits_dispatch_window (insn))
+ return ready_remove_first (ready);
+
+ for (i = 1; i < ready->n_ready; i++)
+ {
+ insn = ready_element (ready, i);
+ if (INSN_CODE (insn) < 0
+ || !INSN_P (insn)
+ || !active_insn_p (insn))
+ continue;
+ if (fits_dispatch_window (insn))
+ {
+ /* Return ith element of ready. */
+ insn = ready_remove (ready, i);
+ return insn;
+ }
+ }
+
+ if (dispatch_violation ())
+ return ready_remove_first (ready);
+
+ for (i = 1; i < ready->n_ready; i++)
+ {
+ insn = ready_element (ready, i);
+ if (INSN_CODE (insn) < 0
+ || !INSN_P (insn)
+ || !active_insn_p (insn))
+ continue;
+ if (is_cmp (insn))
+ /* Return ith element of ready. */
+ return ready_remove (ready, i);
+ }
+
+ return ready_remove_first (ready);
+}
+
+/* Print status of the ready list with respect to dispach windows. */
+static void
+print_ready_dispatch (FILE *file)
+{
+ int i;
+
+ fprintf (file, "Number of ready: %d\n", ready.n_ready);
+ for (i = 0; i < ready.n_ready; i++)
+ debug_print_insn_dispatch_info (ready_element (&ready, i));
+}
+
+/* Print to STDERR the status of the ready list with respect to
+ dispach windows. */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+ print_ready_dispatch (stderr);
+}
+
#endif /* INSN_SCHEDULING */
===================================================================
@@ -1078,6 +1078,10 @@ fschedule-insns2
Common Report Var(flag_schedule_insns_after_reload) Optimization
Reschedule instructions after register allocation
+fdispatch-sched
+Common Report Var(flag_dispatch_sched) Optimization
+Perform dispatch scheduling before and after register allocation
+
; This flag should be on when a target implements non-trivial
; scheduling hooks, maybe saving some information for its own sake.
; On IA64, for example, this is used for correct bundling.
===================================================================
@@ -1497,4 +1497,14 @@ extern void print_insn (char *, const_rt
extern void print_pattern (char *, const_rtx, int);
extern void print_value (char *, const_rtx, int);
+/* Functions in dispatch_sch.c. */
+extern void init_dispatch_sched (void);
+extern void add_to_dispatch_window (rtx);
+extern bool fits_dispatch_window (rtx);
+extern bool is_cmp (rtx insn);
+extern bool dispatch_violation (void);
+extern void debug_print_dispatch_window (int);
+extern void debug_print_insn_dispatch_info (rtx);
+extern void debug_ready_dispatch (void);
+
#endif /* GCC_SCHED_INT_H */
===================================================================
@@ -1203,6 +1203,7 @@ OBJS-common = \
df-scan.o \
dfp.o \
diagnostic.o \
+ dispatch-sch.o \
dojump.o \
dominance.o \
domwalk.o \
@@ -2790,6 +2791,11 @@ fold-const.o : fold-const.c $(CONFIG_H)
$(GIMPLE_H) realmpfr.h
diagnostic.o : diagnostic.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
version.h $(INPUT_H) intl.h $(DIAGNOSTIC_H) diagnostic.def
+dispatch-sch.o : dispatch-sch.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
+ $(TM_H) $(RTL_H) $(SCHED_INT_H) $(REGS_H) hard-reg-set.h \
+ $(FLAGS_H) insn-config.h $(FUNCTION_H) \
+ $(INSN_ATTR_H) $(TOPLEV_H) $(RECOG_H) $(EXCEPT_H) $(TM_P_H) \
+ $(TARGET_H) output.h $(PARAMS_H) $(DBGCNT_H) $(CFGLOOP_H) ira.h
opts.o : opts.c opts.h options.h $(TOPLEV_H) $(CONFIG_H) $(SYSTEM_H) \
coretypes.h $(TREE_H) $(TM_H) langhooks.h $(GGC_H) $(EXPR_H) $(RTL_H) \
output.h $(DIAGNOSTIC_H) $(TM_P_H) $(INSN_ATTR_H) intl.h $(TARGET_H) \
===================================================================
@@ -0,0 +1,874 @@
+/* Dispatch scheduling pass.
+ Copyright (C) 2010
+ Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Dispatch Scheduling is part of the Haifa scheduler. Some machines
+ such as AMD Bulldozer perform more efficiently if a group of
+ instructions have certain alignment and certain operation mix.
+
+ Scheduled instructions are dispatched in windows of size W. N of
+ these windows are scheduled together as a dispatch group.
+
+ There are two phases in dispatch scheduling, before and after
+ register allocation.
+
+ Dispatch scheduling is done before register allocation, to avoid
+ barriers in moving instructions cased by extra dependencies imposed
+ by GRA. It also is done after register allocation, because GRA may
+ change the instruction sequence by generating spill code or eliminating
+ register copies, etc.
+
+ The same algorithm is used before and after register allocation.
+ There is an extra path after register allocation to generate
+ alignments code. This path is done by the assembler, because the
+ exact length of instructions may not be known until then.
+
+ The main interface between dispatch scheduler and the Haifa scheduler
+ is the functions fits_dispatch_window. This function returns true
+ if the dispatch restrictions are satisfied for the scheduling candidate.
+
+ After an instruction is scheduled, the Haifa scheduler calls
+ add_to_dispatch_window to update the dispatch scheduler data base. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "toplev.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "hard-reg-set.h"
+#include "regs.h"
+#include "function.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "toplev.h"
+#include "recog.h"
+#include "sched-int.h"
+#include "target.h"
+#include "output.h"
+#include "params.h"
+#include "vecprim.h"
+#include "dbgcnt.h"
+#include "cfgloop.h"
+#include "ira.h"
+#include "cselib.h"
+
+/* The size of the dispatch window is the total number of bytes of
+ object code allowed in a window. */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling. */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window. */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window. */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window. */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window. */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window. */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window. */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window. */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups. Istructions that affect the mix in a dispatch window. */
+enum dispatch_group {
+ disp_no_group = 0,
+ disp_load,
+ disp_store,
+ disp_load_store,
+ disp_prefetch,
+ disp_imm,
+ disp_imm_32,
+ disp_imm_64,
+ disp_branch,
+ disp_cmp,
+ disp_jcc,
+ disp_last
+};
+
+/* Number of allowable groups in a dispatch window. It is an array
+ indexed by dispatch_group enum. 100 is used as a big number,
+ because the number of these kind of operations does not have any
+ effect in dispatch window, but we need them for other reasons in
+ the table. */
+static int num_allowable_groups[disp_last] = {
+ 0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] = {
+ "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+ "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+ "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path. */
+enum insn_path {
+ no_path = 0,
+ path_single, /* Single micro op. */
+ path_double, /* Double micro op. */
+ path_multi, /* Instructions with more than 2 micro op.. */
+ last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+ the basic block. It contains a pointer to the insn_info table and
+ the instruction scheduled.
+
+ Windows are allocated for each basic block and are linked
+ together. */
+typedef struct sched_insn_info_s {
+ rtx insn;
+ enum dispatch_group group;
+ enum insn_path path;
+ int byte_len;
+ int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows. This is a two way list of
+ dispatch windows of a basic block. It contains information about
+ the number of uops in the window and the total number of
+ instructions and of bytes in the object code for this dispatch
+ window. */
+typedef struct dispatch_windows_s {
+ int num_insn; /* Number of insn in the window. */
+ int num_uops; /* Number of uops in the window. */
+ int window_size; /* Number of bytes in the window. */
+ int window_num; /* Window number between 0 or 1. */
+ int num_imm; /* Number of immediates in an insn. */
+ int num_imm_32; /* Number of 32 bit immediates in an insn. */
+ int num_imm_64; /* Number of 64 bit immediates in an insn. */
+ int imm_size; /* Total immediates in the window. */
+ int num_loads; /* Total memory loads in the window. */
+ int num_stores; /* Total memory stores in the window. */
+ int violation; /* Violation exists in window. */
+ sched_insn_info *window; /* Pointer to the window. */
+ struct dispatch_windows_s *next;
+ struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Returns decode type on an AMDFAM10 machine which can be
+ "DIRECT", "DOUBLE", "VECTOR" which are decoded
+ to 1, 2 or more than 2 micro-ops respectively. */
+static enum attr_amdfam10_decode
+dispatch_group_amdfam10_decode (rtx insn)
+{
+ enum attr_amdfam10_decode amdfam10_decode;
+ amdfam10_decode = get_attr_amdfam10_decode (insn);
+ return amdfam10_decode;
+}
+
+/* Get dispatch group of insn. */
+
+static enum dispatch_group
+get_mem_group (rtx insn)
+{
+ enum attr_memory memory;
+
+ if (INSN_CODE (insn) < 0)
+ return disp_no_group;
+ memory = get_attr_memory (insn);
+ if (memory == MEMORY_STORE)
+ return disp_store;
+
+ if (memory == MEMORY_LOAD)
+ return disp_load;
+
+ if (memory == MEMORY_BOTH)
+ return disp_load_store;
+
+ return disp_no_group;
+}
+
+/* Return true if insn is a compare instruction. */
+
+bool
+is_cmp (rtx insn)
+{
+ enum attr_type type;
+ type = get_attr_type (insn);
+ return (type == TYPE_TEST || type == TYPE_ICMP || type == TYPE_FCMP);
+}
+
+/* Return true if a dispatch violation encountered. */
+
+bool
+dispatch_violation (void)
+{
+ if (dispatch_window_list->next)
+ return dispatch_window_list->next->violation;
+ return dispatch_window_list->violation;
+}
+
+/* Return true if insn is a branch instruction. */
+
+static bool
+is_branch (rtx insn)
+{
+ return (CALL_P (insn) || JUMP_P (insn));
+}
+
+static bool
+is_prefetch (rtx insn)
+{
+ return ((strcmp (GET_RTX_NAME (GET_CODE (insn)), "prefetch_sse") == 0)
+ || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+ "prefetch_sse_rex") == 0)
+ || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+ "prefetch_3dnow") == 0)
+ || (strcmp (GET_RTX_NAME (GET_CODE (insn)),
+ "prefetch_3dnow_rex") == 0));
+}
+
+/* This function initializes a dispatch window and the list container holding a
+ pointer to the window. */
+
+static void
+init_window (int window_num)
+{
+ int i;
+ dispatch_windows *new_list;
+
+ if (window_num == 0)
+ new_list = dispatch_window_list;
+ else
+ new_list = dispatch_window_list1;
+
+ new_list->num_insn = 0;
+ new_list->num_uops = 0;
+ new_list->window_size = 0;
+ new_list->next = NULL;
+ new_list->prev = NULL;
+ new_list->window_num = window_num;
+ new_list->num_imm = 0;
+ new_list->num_imm_32 = 0;
+ new_list->num_imm_64 = 0;
+ new_list->imm_size = 0;
+ new_list->num_loads = 0;
+ new_list->num_stores = 0;
+ new_list->violation = false;
+
+ for (i = 0; i < MAX_INSN; i++)
+ {
+ new_list->window[i].insn = NULL;
+ new_list->window[i].group = disp_no_group;
+ new_list->window[i].path = no_path;
+ new_list->window[i].byte_len = 0;
+ new_list->window[i].imm_bytes = 0;
+ }
+ return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+ list container holding a pointer to the window. */
+
+static dispatch_windows *
+allocate_window (void)
+{
+ dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+ new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+ return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information. It
+ initiates building dispatch scheduler tables and constructs the
+ first dispatch window. */
+
+void
+init_dispatch_sched (void)
+{
+ /* Allocate a dispatch list and a window. */
+ dispatch_window_list = allocate_window ();
+ dispatch_window_list1 = allocate_window ();
+ init_window (0);
+ init_window (1);
+}
+
+
+/* This function returns true if a branch is detected. End of a basic block
+ does not have to be a branch, but here we assume only branches end a
+ window. */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+ return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached. */
+
+static void
+process_end_window (void)
+{
+ gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+ if (dispatch_window_list->next)
+ {
+ gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+ gcc_assert (dispatch_window_list->window_size + dispatch_window_list1->window_size <= 48);
+ init_window (1);
+ }
+ init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+ WINDOW_NUM is either 0 or 1. A maximum of two windows are generated
+ for 48 bytes of instructions. Note that these windows are not dispatch
+ windows that their sizes are DISPATCH_WINDOW_SIZE. */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+ if (window_num == 0)
+ {
+ if (dispatch_window_list->next)
+ init_window (1);
+ init_window (0);
+ return dispatch_window_list;
+ }
+
+ dispatch_window_list->next = dispatch_window_list1;
+ dispatch_window_list1->prev = dispatch_window_list;
+
+ return dispatch_window_list1;
+}
+
+/* Recursive function returning total sizes of immediate operands of an
+ instruction along with number of corresponding immediate-operands. */
+
+static void
+find_con (const_rtx in_rtx, int *imm, int *imm32, int *imm64)
+{
+ int i = 0;
+ int j;
+ const char *format_ptr;
+ enum rtx_code code;
+
+ if (in_rtx == 0)
+ return;
+
+ code = GET_CODE (in_rtx);
+ if (code == CONST_INT)
+ {
+ (*imm)++;
+ (*imm32)++;
+ }
+ else if (code == CONST_DOUBLE)
+ {
+ (*imm)++;
+ (*imm64)++;
+ }
+ else if (GET_CODE (in_rtx) == CODE_LABEL)
+ {
+ if (LABEL_KIND (in_rtx) == LABEL_NORMAL)
+ {
+ (*imm)++;
+ (*imm32)++;
+ }
+ }
+
+ if (GET_CODE (in_rtx) == VAR_LOCATION)
+ {
+ find_con (PAT_VAR_LOCATION_LOC (in_rtx), imm, imm32, imm64);
+ i = GET_RTX_LENGTH (VAR_LOCATION);
+ }
+
+ if (GET_CODE (in_rtx) == CONST_DOUBLE && FLOAT_MODE_P (GET_MODE (in_rtx)))
+ i = 5;
+
+ format_ptr = GET_RTX_FORMAT (GET_CODE (in_rtx)) + i;
+ for (; i < GET_RTX_LENGTH (GET_CODE (in_rtx)); i++)
+ switch (*format_ptr++)
+ {
+ case 'e':
+ find_con (XEXP (in_rtx, i), imm, imm32, imm64);
+ break;
+
+ case 'E':
+ case 'V':
+ if (XVEC (in_rtx, i) != NULL)
+ for (j = 0; j < XVECLEN (in_rtx, i); j++)
+ find_con (XVECEXP (in_rtx, i, j), imm, imm32, imm64);
+ break;
+ }
+}
+
+/* Return total sizes of immediate operands of an instruction along with number
+ of corresponding immediate-operands. */
+
+static int
+get_num_imm (rtx insn, int *imm, int *imm32, int *imm64)
+{
+ *imm = 0;
+ *imm32 = 0;
+ *imm64 = 0;
+ find_con (insn, imm, imm32, imm64);
+ return *imm32 * 4 + *imm64 * 8;
+}
+
+/* This function indicates if an operand of an instruction is an
+ immediate. */
+
+static bool
+has_imm (rtx insn)
+{
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (insn)
+ return get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+ return false;
+}
+
+/* Get bytes length of INSN.
+ This function is very similar to the static function min_insn_size
+ in i386.c. The main difference is the use of get_attr_length. */
+
+static int
+get_insn_length (rtx insn)
+{
+ int len, l = 0;
+
+ if (!INSN_P (insn) || !active_insn_p (insn))
+ return 0;
+
+ len = get_attr_length (insn);
+ /* For normal instructions we rely on get_attr_length being exact,
+ with a few exceptions. */
+ if (!JUMP_P (insn))
+ {
+ enum attr_type type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_MULTI:
+ if (GET_CODE (PATTERN (insn)) == ASM_INPUT
+ || asm_noperands (PATTERN (insn)) >= 0)
+ return 0;
+ break;
+ case TYPE_OTHER:
+ case TYPE_FCMP:
+ break;
+ default:
+ /* Otherwise trust get_attr_length. */
+ return len;
+ }
+
+ l = get_attr_length_address (insn);
+ if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
+ l = 4;
+ }
+ if (l)
+ return 1+l;
+ else
+ return 2;
+}
+
+
+/* Return single or double path for instructions. */
+
+static enum insn_path
+get_insn_path (rtx insn)
+{
+ enum attr_amdfam10_decode path = dispatch_group_amdfam10_decode (insn);
+
+ if ((int)path == 0)
+ return path_single;
+
+ if ((int)path == 1)
+ return path_double;
+
+ return path_multi;
+}
+
+/* Return insn dispatch group. */
+
+static enum
+dispatch_group get_insn_group (rtx insn)
+{
+ enum dispatch_group group = get_mem_group (insn);
+ if (group)
+ return group;
+
+ if (is_branch (insn))
+ return disp_branch;
+
+ if (is_cmp (insn))
+ return disp_cmp;
+
+ if (has_imm (insn))
+ return disp_imm;
+
+ if (is_prefetch (insn))
+ return disp_prefetch;
+
+ return disp_no_group;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+ dispatch window WINDOW_LIST. */
+
+static void
+add_insn_window (rtx insn, dispatch_windows *window_list, int num_uops)
+{
+ int byte_len = get_insn_length (insn);
+ int num_insn = window_list->num_insn;
+ int imm_size;
+ sched_insn_info *window = window_list->window;
+ enum dispatch_group group = get_insn_group (insn);
+ enum insn_path path = get_insn_path (insn);
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (!window_list->violation && group != disp_cmp
+ && !fits_dispatch_window (insn))
+ window_list->violation = true;
+
+ imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+ /* Initialize window with new instruction. */
+ window[num_insn].insn = insn;
+ window[num_insn].byte_len = byte_len;
+ window[num_insn].group = group;
+ window[num_insn].path = path;
+ window[num_insn].imm_bytes = imm_size;
+
+ window_list->window_size += byte_len;
+ window_list->num_insn = num_insn + 1;
+ window_list->num_uops = window_list->num_uops + num_uops;
+ window_list->imm_size += imm_size;
+ window_list->num_imm += num_imm_operand;
+ window_list->num_imm_32 += num_imm32_operand;
+ window_list->num_imm_64 += num_imm64_operand;
+
+ if (group == disp_store)
+ window_list->num_stores += 1;
+ else if (group == disp_load
+ || group == disp_prefetch)
+ window_list->num_loads += 1;
+ else if (group == disp_load_store)
+ {
+ window_list->num_stores += 1;
+ window_list->num_loads += 1;
+ }
+}
+
+/* Count number of GROUP restricted instructions in a dispatch
+ window WINDOW_LIST. */
+
+static int
+count_num_restricted (rtx insn, dispatch_windows *window_list)
+{
+ enum dispatch_group group = get_insn_group (insn);
+ int imm_size;
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (group == disp_no_group)
+ return 0;
+
+ if (group == disp_imm)
+ {
+ imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+ if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+ || num_imm_operand + window_list->num_imm > MAX_IMM
+ || (num_imm32_operand > 0
+ && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+ || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+ || (num_imm64_operand > 0
+ && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+ || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+ || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+ && num_imm64_operand > 0
+ && ((window_list->num_imm_64 > 0
+ && window_list->num_insn >= 2)
+ || window_list->num_insn >= 3)))
+ return BIG;
+
+ return 1;
+ }
+
+ if ((group == disp_load_store
+ && (window_list->num_loads >= MAX_LOAD
+ || window_list->num_stores >= MAX_STORE))
+ || ((group == disp_load
+ || group == disp_prefetch)
+ && window_list->num_loads >= MAX_LOAD)
+ || (group == disp_store
+ && window_list->num_stores >= MAX_STORE))
+ return BIG;
+
+ return 1;
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+ If the total bytes of instructions or the number of instructions in
+ the window exceed allowable, it allocates a new window. */
+
+void
+add_to_dispatch_window (rtx insn)
+{
+ int byte_len;
+ dispatch_windows *window_list;
+ dispatch_windows *next_list;
+ dispatch_windows *window0_list;
+ enum insn_path path;
+ enum dispatch_group insn_group;
+ bool insn_fits;
+ int num_insn;
+ int num_uops;
+ int window_num;
+ int insn_num_uops;
+ int sum;
+
+ if (INSN_CODE (insn) < 0)
+ return;
+
+ byte_len = get_insn_length (insn);
+ window_list = dispatch_window_list;
+ next_list = window_list->next;
+ path = get_insn_path (insn);
+ insn_group = get_insn_group (insn);
+
+ /* Get the last dispatch window. */
+ if (next_list)
+ window_list = dispatch_window_list->next;
+
+ if (path == path_single)
+ insn_num_uops = 1;
+ else if (path == path_double)
+ insn_num_uops = 2;
+ else
+ insn_num_uops = (int) path;
+
+ /* If current window is full, get a new window.
+ Window number zero is full, if MAX_INSN uops are scheduled in it.
+ Window number one is full, if window zero's bytes plus window
+ one's bytes is 32, or if the bytes of the new instruction added
+ to the total makes it greater than 48, or it has already MAX_INSN
+ instructions in it. */
+ num_insn = window_list->num_insn;
+ num_uops = window_list->num_uops;
+ window_num = window_list->window_num;
+ insn_fits = fits_dispatch_window (insn);
+
+ if (num_insn >= MAX_INSN
+ || num_uops + insn_num_uops > MAX_INSN
+ || !(insn_fits))
+ {
+ window_num = ~window_num & 1;
+ window_list = allocate_next_window (window_num);
+ }
+
+ if (window_num == 0)
+ {
+ add_insn_window (insn, window_list, insn_num_uops);
+ if (window_list->num_insn >= MAX_INSN
+ && insn_group == disp_branch)
+ {
+ process_end_window ();
+ return;
+ }
+ }
+ else if (window_num == 1)
+ {
+ window0_list = window_list->prev;
+ sum = window0_list->window_size + window_list->window_size;
+ if (sum == 32
+ || (byte_len + sum) >= 48)
+ {
+ process_end_window ();
+ window_list = dispatch_window_list;
+ }
+
+ add_insn_window (insn, window_list, insn_num_uops);
+ }
+ else
+ gcc_unreachable ();
+
+ if (is_end_basic_block (insn_group))
+ {
+ /* End of basic block is reached do end-basic-block process. */
+ process_end_window ();
+ return;
+ }
+}
+
+/* This function returns true if insn satisfies dispatch rules on the
+ last window scheduled. */
+
+bool
+fits_dispatch_window (rtx insn)
+{
+ dispatch_windows *window_list = dispatch_window_list;
+ dispatch_windows *window_list_next = dispatch_window_list->next;
+ int num_restrict;
+ enum dispatch_group group = get_insn_group (insn);
+ enum insn_path path = get_insn_path (insn);
+ int sum;
+
+ /* Make disp_cmp and disp_jcc get scheduled at the latest. These
+ instructions should be given the lowest priority in the
+ scheduling process in Haifa scheduler to make sure they will be
+ scheduled in the same dispatch window as the refrence to them. */
+ if (group == disp_jcc || group == disp_cmp)
+ return false;
+
+ /* Check nonrestricted. */
+ if (group == disp_no_group || group == disp_branch)
+ return true;
+
+ /* Get last dispatch window. */
+ if (window_list_next)
+ window_list = window_list_next;
+
+ if (window_list->window_num == 1)
+ {
+ sum = window_list->prev->window_size + window_list->window_size;
+ if (sum == 32
+ || (get_insn_length (insn) + sum) >= 48)
+ /* Window 1 is full. Go for next window. */
+ return true;
+ }
+
+ num_restrict = count_num_restricted (insn, window_list);
+
+ if (num_restrict > num_allowable_groups[group])
+ return false;
+
+ /* See if it fits in the first window. */
+ if (window_list->window_num == 0)
+ {
+ /* The first widow should have only single and double path
+ uops. */
+ if (path == path_double
+ && (window_list->num_uops + 2) > MAX_INSN)
+ return false;
+ else if (path != path_single)
+ return false;
+ }
+ return true;
+}
+
+/* Print a dispatch window. */
+
+static void
+print_dispatch_window (FILE *file, int window_num)
+{
+ dispatch_windows *list;
+ int i;
+
+ if (window_num == 0)
+ list = dispatch_window_list;
+ else
+ list = dispatch_window_list1;
+
+ fprintf (file, "Window #%d:\n", list->window_num);
+ fprintf (file, " num_insn = %d, num_uops = %d, window_size = %d\n",
+ list->num_insn, list->num_uops, list->window_size);
+ fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d,\
+ imm_size = %d\n",
+ list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+ fprintf (file, " num_loads = %d, num_stores = %d\n", list->num_loads,
+ list->num_stores);
+ fprintf (file, " insn info:\n");
+
+ for (i = 0; i < MAX_INSN; i++)
+ {
+ if (!list->window[i].insn)
+ break;
+ fprintf (file, " group[%d] = %s, insn[%d] = %p, path[%d] = %d\
+ byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+ i, group_name[list->window[i].group],
+ i, (void *)list->window[i].insn,
+ i, list->window[i].path,
+ i, list->window[i].byte_len,
+ i, list->window[i].imm_bytes);
+ }
+}
+
+/* Print to stderr a dispatch window. */
+
+DEBUG_FUNCTION void
+debug_print_dispatch_window (int window_num)
+{
+ print_dispatch_window (stderr, window_num);
+}
+
+/* Print insn dispatch information. */
+
+static void
+print_insn_dispatch_info (FILE *file, rtx insn)
+{
+ int byte_len;
+ enum insn_path path;
+ enum dispatch_group group;
+ int imm_size;
+ int num_imm_operand;
+ int num_imm32_operand;
+ int num_imm64_operand;
+
+ if (INSN_CODE (insn) < 0)
+ return;
+
+ byte_len = get_insn_length (insn);
+ path = get_insn_path (insn);
+ group = get_insn_group (insn);
+ imm_size = get_num_imm (insn, &num_imm_operand, &num_imm32_operand,
+ &num_imm64_operand);
+
+ fprintf (file, " insn info:\n");
+ fprintf (file, " group = %s, path = %d, byte_len = %d\n", group_name[group],
+ path, byte_len);
+ fprintf (file, " num_imm = %d, num_imm_32 = %d, num_imm_64 = %d,\
+ imm_size = %d\n",
+ num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print insn dispatch information. */
+
+DEBUG_FUNCTION void
+debug_print_insn_dispatch_info (rtx insn)
+{
+ print_insn_dispatch_info (stderr, insn);
+}
===================================================================
@@ -3573,6 +3573,9 @@ override_options (bool main_args_p)
if (main_args_p)
target_option_default_node = target_option_current_node
= build_target_option_node ();
+
+ if (ix86_tune == PROCESSOR_BDVER1)
+ flag_dispatch_sched = 1;
}
/* Update register usage after having seen the compiler flags. */