* config/arm/arm.c (arm_address_offset_is_imm): New.
(arm_early_store_addr_dep): New.
(arm_early_load_addr_dep): New.
* config/arm/arm-protos.h (arm_early_store_addr_dep): Declare.
(arm_early_load_addr_dep): Declare.
(arm_address_offset_is_imm): Declare.
* config/arm/cortetx-m4.md: New file.
* config/arm/cortex-m4f.md: New file.
* config/arm/arm-cores.def: Add cortex-m4f.
* config/arm/arm-tune.md: Regenerate.
* config/arm/arm.md: Include cortex-m4.md and cortex-m4f.md.
(attr tune_cortexm4): Define.
(attr generic_sched): Use tune_cortexm4.
(attr generic_vfp): Use tune_cortexm4.
* doc/invoke.texi (ARM Options): Document cortex-m4f.
===================================================================
@@ -10122,8 +10122,8 @@ assembly code. Permissible names are: @
@samp{arm1136j-s}, @samp{arm1136jf-s}, @samp{mpcore}, @samp{mpcorenovfp},
@samp{arm1156t2-s}, @samp{arm1156t2f-s}, @samp{arm1176jz-s}, @samp{arm1176jzf-s},
@samp{cortex-a5}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a15},
-@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m3},
-@samp{cortex-m1},
+@samp{cortex-r4}, @samp{cortex-r4f}, @samp{cortex-m4}, @samp{cortex-m4f},
+@samp{cortex-m3}, @samp{cortex-m1},
@samp{cortex-m0},
@samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312}.
===================================================================
@@ -13333,6 +13333,34 @@ arm_attr_length_move_neon (rtx insn)
return 4;
}
+/* Return nonzero if the offset in the address is an immediate. Otherwise,
+ return zero. */
+
+int
+arm_address_offset_is_imm (rtx insn)
+{
+ rtx mem, addr;
+
+ extract_insn_cached (insn);
+
+ if (REG_P (recog_data.operand[0]))
+ return 0;
+
+ mem = recog_data.operand[0];
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+
+ if (GET_CODE (addr) == REG
+ || (GET_CODE (addr) == PLUS
+ && GET_CODE (XEXP (addr, 0)) == REG
+ && GET_CODE (XEXP (addr, 1)) == CONST_INT))
+ return 1;
+ else
+ return 0;
+}
+
/* Output an ADD r, s, #n where n may be too big for one instruction.
If adding zero to one register, output nothing. */
const char *
@@ -21491,6 +21519,38 @@ arm_no_early_store_addr_dep (rtx produce
return !reg_overlap_mentioned_p (value, addr);
}
+/* Return nonzero if the CONSUMER instruction (a store) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_store_addr_dep (rtx producer, rtx consumer)
+{
+ return !arm_no_early_store_addr_dep (producer, consumer);
+}
+
+/* Return nonzero if the CONSUMER instruction (a load) does need
+ PRODUCER's value to calculate the address. */
+
+int
+arm_early_load_addr_dep (rtx producer, rtx consumer)
+{
+ rtx value = PATTERN (producer);
+ rtx addr = PATTERN (consumer);
+
+ if (GET_CODE (value) == COND_EXEC)
+ value = COND_EXEC_CODE (value);
+ if (GET_CODE (value) == PARALLEL)
+ value = XVECEXP (value, 0, 0);
+ value = XEXP (value, 0);
+ if (GET_CODE (addr) == COND_EXEC)
+ addr = COND_EXEC_CODE (addr);
+ if (GET_CODE (addr) == PARALLEL)
+ addr = XVECEXP (addr, 0, 0);
+ addr = XEXP (addr, 1);
+
+ return reg_overlap_mentioned_p (value, addr);
+}
+
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the
result of PRODUCER. */
===================================================================
@@ -0,0 +1,111 @@
+;; ARM Cortex-M4 pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "cortex_m4")
+
+;; We model the pipelining of LDR instructions by using two artificial units.
+
+(define_cpu_unit "cortex_m4_a" "cortex_m4")
+
+(define_cpu_unit "cortex_m4_b" "cortex_m4")
+
+(define_reservation "cortex_m4_ex" "cortex_m4_a+cortex_m4_b")
+
+;; ALU and multiply is one cycle.
+(define_insn_reservation "cortex_m4_alu" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "alu,alu_shift,alu_shift_reg,mult"))
+ "cortex_m4_ex")
+
+;; Byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_load1" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "load_byte,load1"))
+ "cortex_m4_a, cortex_m4_b")
+
+;; str rx, [ry, #imm] is always one cycle.
+(define_insn_reservation "cortex_m4_store1_1" 1
+ (and (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "store1"))
+ (ne (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
+ "cortex_m4_a")
+
+;; Other byte, half-word and word load is two cycles.
+(define_insn_reservation "cortex_m4_store1_2" 2
+ (and (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "store1"))
+ (eq (symbol_ref ("arm_address_offset_is_imm (insn)")) (const_int 0)))
+ "cortex_m4_a*2")
+
+(define_insn_reservation "cortex_m4_load2" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "load2"))
+ "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_store2" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "store2"))
+ "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_load3" 4
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "load3"))
+ "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_store3" 4
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "store3"))
+ "cortex_m4_ex*4")
+
+(define_insn_reservation "cortex_m4_load4" 5
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "load4"))
+ "cortex_m4_ex*5")
+
+(define_insn_reservation "cortex_m4_store4" 5
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "store4"))
+ "cortex_m4_ex*5")
+
+;; If the address of load or store depends on the result of the preceding
+;; instruction, the latency is increased by one.
+
+(define_bypass 2 "cortex_m4_alu"
+ "cortex_m4_load1"
+ "arm_early_load_addr_dep")
+
+(define_bypass 2 "cortex_m4_alu"
+ "cortex_m4_store1_1,cortex_m4_store1_2"
+ "arm_early_store_addr_dep")
+
+(define_insn_reservation "cortex_m4_branch" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "branch"))
+ "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_call" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "call"))
+ "cortex_m4_ex*3")
+
+(define_insn_reservation "cortex_m4_block" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "block"))
+ "cortex_m4_ex")
===================================================================
@@ -125,6 +125,7 @@ ARM_CORE("cortex-a15", cortexa15, 7A,
ARM_CORE("cortex-r4", cortexr4, 7R, FL_LDSCHED, 9e)
ARM_CORE("cortex-r4f", cortexr4f, 7R, FL_LDSCHED, 9e)
ARM_CORE("cortex-m4", cortexm4, 7EM, FL_LDSCHED, 9e)
+ARM_CORE("cortex-m4f", cortexm4f, 7EM, FL_LDSCHED, 9e)
ARM_CORE("cortex-m3", cortexm3, 7M, FL_LDSCHED, 9e)
ARM_CORE("cortex-m1", cortexm1, 6M, FL_LDSCHED, 9e)
ARM_CORE("cortex-m0", cortexm0, 6M, FL_LDSCHED, 9e)
===================================================================
@@ -0,0 +1,111 @@
+;; ARM Cortex-M4F VFP pipeline description
+;; Copyright (C) 2010 Free Software Foundation, Inc.
+;; Contributed by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Use an artifial unit to model FPU.
+(define_cpu_unit "cortex_m4_v" "cortex_m4")
+
+(define_reservation "cortex_m4_ex_v" "cortex_m4_ex+cortex_m4_v")
+
+;; Integer instructions following VDIV or VSQRT complete out-of-order.
+(define_insn_reservation "cortex_m4_fdivs" 15
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fdivs"))
+ "cortex_m4_ex_v,cortex_m4_v*13")
+
+(define_insn_reservation "cortex_m4_vmov_1" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fcpys,fconsts"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_vmov_2" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_2_r,r_2_f"))
+ "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_fmuls" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fmuls"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fmacs" 4
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fmacs"))
+ "cortex_m4_ex_v*3")
+
+(define_insn_reservation "cortex_m4_ffariths" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "ffariths"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fadds" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fadds"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_fcmps" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "fcmps"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_flag" 1
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_flag"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_cvt" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_cvt"))
+ "cortex_m4_ex_v")
+
+(define_insn_reservation "cortex_m4_f_load" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_load"))
+ "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_f_store" 2
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_store"))
+ "cortex_m4_ex_v*2")
+
+(define_insn_reservation "cortex_m4_f_loadd" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_loadd"))
+ "cortex_m4_ex_v*3")
+
+(define_insn_reservation "cortex_m4_f_stored" 3
+ (and (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "type" "f_stored"))
+ "cortex_m4_ex_v*3")
+
+;; MAC instructions consume their addend one cycle later. If the result
+;; of an arithmetic instruction is consumed as the addend of the following
+;; MAC instruction, the latency can be decreased by one.
+
+(define_bypass 1 "cortex_m4_fadds,cortex_m4_fmuls,cortex_m4_f_cvt"
+ "cortex_m4_fmacs"
+ "arm_no_early_mul_dep")
+
+(define_bypass 3 "cortex_m4_fmacs"
+ "cortex_m4_fmacs"
+ "arm_no_early_mul_dep")
+
+(define_bypass 14 "cortex_m4_fdivs"
+ "cortex_m4_fmacs"
+ "arm_no_early_mul_dep")
===================================================================
@@ -88,6 +88,8 @@ extern int arm_coproc_mem_operand (rtx,
extern int neon_vector_mem_operand (rtx, int);
extern int neon_struct_mem_operand (rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
+extern int arm_early_store_addr_dep (rtx, rtx);
+extern int arm_early_load_addr_dep (rtx, rtx);
extern int arm_no_early_alu_shift_dep (rtx, rtx);
extern int arm_no_early_alu_shift_value_dep (rtx, rtx);
extern int arm_no_early_mul_dep (rtx, rtx);
@@ -129,6 +131,7 @@ extern const char *output_move_quad (rtx
extern const char *output_move_vfp (rtx *operands);
extern const char *output_move_neon (rtx *operands);
extern int arm_attr_length_move_neon (rtx);
+extern int arm_address_offset_is_imm (rtx);
extern const char *output_add_immediate (rtx *);
extern const char *arithmetic_instr (rtx, int);
extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
===================================================================
@@ -493,19 +493,27 @@
(const_string "yes")
(const_string "no"))))
+(define_attr "tune_cortexm4" "yes,no"
+ (const (if_then_else
+ (eq_attr "tune" "cortexm4,cortexm4f")
+ (const_string "yes")
+ (const_string "no"))))
+
;; True if the generic scheduling description should be used.
(define_attr "generic_sched" "yes,no"
- (const (if_then_else
+ (const (if_then_else
(ior (eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa5,cortexa8,cortexa9")
- (eq_attr "tune_cortexr4" "yes"))
+ (eq_attr "tune_cortexm4" "yes")
+ (eq_attr "tune_cortexr4" "yes"))
(const_string "no")
(const_string "yes"))))
(define_attr "generic_vfp" "yes,no"
(const (if_then_else
(and (eq_attr "fpu" "vfp")
- (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9")
+ (eq_attr "tune" "!arm1020e,arm1022e,cortexa5,cortexa8,cortexa9,cortexm4")
+ (eq_attr "tune_cortexm4" "no")
(eq_attr "tune_cortexr4" "no"))
(const_string "yes")
(const_string "no"))))
@@ -520,6 +528,8 @@
(include "cortex-a9.md")
(include "cortex-r4.md")
(include "cortex-r4f.md")
+(include "cortex-m4.md")
+(include "cortex-m4f.md")
(include "vfp11.md")