===================================================================
@@ -1,5 +1,5 @@
/* Subroutines used for code generation on IBM RS/6000.
- Copyright (C) 1991-2013 Free Software Foundation, Inc.
+ Copyright (C) 1991-2014 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
@@ -56,6 +56,7 @@
#include "intl.h"
#include "params.h"
#include "tm-constrs.h"
+#include "ira.h"
#include "opts.h"
#include "tree-vectorizer.h"
#include "dumpfile.h"
@@ -1563,6 +1564,9 @@ static const struct attribute_spec rs600
#undef TARGET_MODE_DEPENDENT_ADDRESS_P
#define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rs6000_lra_p
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE rs6000_can_eliminate
@@ -6242,7 +6246,7 @@ rs6000_legitimate_offset_address_p (enum
return false;
if (!reg_offset_addressing_ok_p (mode))
return virtual_stack_registers_memory_p (x);
- if (legitimate_constant_pool_address_p (x, mode, strict))
+ if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
return true;
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
return false;
@@ -6383,9 +6387,21 @@ legitimate_lo_sum_address_p (enum machin
if (TARGET_ELF || TARGET_MACHO)
{
+ bool large_toc_ok;
+
if (DEFAULT_ABI == ABI_V4 && flag_pic)
return false;
- if (TARGET_TOC)
+ /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
+ push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
+ recognizes some LO_SUM addresses as valid although this
+ function says opposite. In most cases, LRA through different
+ transformations can generate correct code for address reloads.
+ It can not manage only some LO_SUM cases. So we need to add
+ code analogous to one in rs6000_legitimize_reload_address for
+ LOW_SUM here saying that some addresses are still valid. */
+ large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
+ && small_toc_ref (x, VOIDmode));
+ if (TARGET_TOC && ! large_toc_ok)
return false;
if (GET_MODE_NUNITS (mode) != 1)
return false;
@@ -6395,7 +6411,7 @@ legitimate_lo_sum_address_p (enum machin
&& (mode == DFmode || mode == DDmode)))
return false;
- return CONSTANT_P (x);
+ return CONSTANT_P (x) || large_toc_ok;
}
return false;
@@ -7106,7 +7122,6 @@ use_toc_relative_ref (rtx sym)
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
get_pool_mode (sym)))
|| (TARGET_CMODEL == CMODEL_MEDIUM
- && !CONSTANT_POOL_ADDRESS_P (sym)
&& SYMBOL_REF_LOCAL_P (sym)));
}
@@ -7394,7 +7409,8 @@ rs6000_legitimate_address_p (enum machin
if (reg_offset_p && legitimate_small_data_p (mode, x))
return 1;
if (reg_offset_p
- && legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
+ && legitimate_constant_pool_address_p (x, mode,
+ reg_ok_strict || lra_in_progress))
return 1;
/* For TImode, if we have load/store quad and TImode in VSX registers, only
allow register indirect addresses. This will allow the values to go in
@@ -7680,6 +7696,7 @@ rs6000_conditional_register_usage (void)
fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
}
}
+
/* Try to output insns to set TARGET equal to the constant C if it can
be done in less than N insns. Do all computations in MODE.
@@ -8112,6 +8129,68 @@ rs6000_emit_move (rtx dest, rtx source,
cfun->machine->sdmode_stack_slot =
eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
+
+ if (lra_in_progress
+ && mode == SDmode
+ && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[0])) == NO_REGS
+ && (REG_P (operands[1])
+ || (GET_CODE (operands[1]) == SUBREG
+ && REG_P (SUBREG_REG (operands[1])))))
+ {
+ int regno = REGNO (GET_CODE (operands[1]) == SUBREG
+ ? SUBREG_REG (operands[1]) : operands[1]);
+ enum reg_class cl;
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ gcc_assert (cl != NO_REGS);
+ regno = ira_class_hard_regs[cl][0];
+ }
+ if (FP_REGNO_P (regno))
+ {
+ if (GET_MODE (operands[0]) != DDmode)
+ operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
+ emit_insn (gen_movsd_store (operands[0], operands[1]));
+ }
+ else if (INT_REGNO_P (regno))
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+ else
+ gcc_unreachable();
+ return;
+ }
+ if (lra_in_progress
+ && mode == SDmode
+ && (REG_P (operands[0])
+ || (GET_CODE (operands[0]) == SUBREG
+ && REG_P (SUBREG_REG (operands[0]))))
+ && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
+ && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
+ {
+ int regno = REGNO (GET_CODE (operands[0]) == SUBREG
+ ? SUBREG_REG (operands[0]) : operands[0]);
+ enum reg_class cl;
+
+ if (regno >= FIRST_PSEUDO_REGISTER)
+ {
+ cl = reg_preferred_class (regno);
+ gcc_assert (cl != NO_REGS);
+ regno = ira_class_hard_regs[cl][0];
+ }
+ if (FP_REGNO_P (regno))
+ {
+ if (GET_MODE (operands[1]) != DDmode)
+ operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
+ emit_insn (gen_movsd_load (operands[0], operands[1]));
+ }
+ else if (INT_REGNO_P (regno))
+ emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
+ else
+ gcc_unreachable();
+ return;
+ }
+
if (reload_in_progress
&& mode == SDmode
&& cfun->machine->sdmode_stack_slot != NULL_RTX
@@ -15501,6 +15580,17 @@ rs6000_secondary_memory_needed_rtx (enum
return ret;
}
+/* Return the mode to be used for memory when a secondary memory
+ location is needed. For SDmode values we need to use DDmode, in
+ all other cases we can use the same mode. */
+enum machine_mode
+rs6000_secondary_memory_needed_mode (enum machine_mode mode)
+{
+ if (mode == SDmode)
+ return DDmode;
+ return mode;
+}
+
static tree
rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
{
@@ -16394,6 +16484,10 @@ rs6000_alloc_sdmode_stack_slot (void)
gimple_stmt_iterator gsi;
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+ /* We use a different approach for dealing with the secondary
+ memory in LRA. */
+ if (ira_use_lra_p)
+ return;
if (TARGET_NO_SDMODE_STACK)
return;
@@ -16615,7 +16709,7 @@ rs6000_secondary_reload_class (enum reg_
/* Constants, memory, and FP registers can go into FP registers. */
if ((regno == -1 || FP_REGNO_P (regno))
&& (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
- return (mode != SDmode) ? NO_REGS : GENERAL_REGS;
+ return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
/* Memory, and FP/altivec registers can go into fp/altivec registers under
VSX. However, for scalar variables, use the traditional floating point
@@ -30418,6 +30512,13 @@ rs6000_libcall_value (enum machine_mode
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+rs6000_lra_p (void)
+{
+ return rs6000_lra_flag;
+}
+
/* Given FROM and TO register numbers, say whether this elimination is allowed.
Frame pointer elimination is automatically handled.
===================================================================
@@ -1,6 +1,6 @@
; Options for the rs6000 port of the compiler
;
-; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+; Copyright (C) 2005-2014 Free Software Foundation, Inc.
; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
;
; This file is part of GCC.
@@ -454,6 +454,10 @@ mlong-double-
Target RejectNegative Joined UInteger Var(rs6000_long_double_type_size) Save
-mlong-double-<n> Specify size of long double (64 or 128 bits)
+mlra
+Target Report Var(rs6000_lra_flag) Init(0) Save
+Use LRA instead of reload
+
msched-costly-dep=
Target RejectNegative Joined Var(rs6000_sched_costly_dep_str)
Determine which dependences between insns are considered costly
===================================================================
@@ -1,5 +1,5 @@
;; Machine description for PowerPC synchronization instructions.
-;; Copyright (C) 2005-2013 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2014 Free Software Foundation, Inc.
;; Contributed by Geoffrey Keating.
;; This file is part of GCC.
@@ -205,9 +205,12 @@
[(set_attr "type" "load_l")])
;; Use PTImode to get even/odd register pairs.
+
;; Use a temporary register to force getting an even register for the
-;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
-;; copy on big endian systems.
+;; lqarx/stqcrx. instructions. Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode, unless we are using the LRA register allocator. In
+;; GCC 4.9, the register allocator is smart enough to assign a even/odd
+;; register pair.
;; On little endian systems where non-atomic quad word load/store instructions
;; are not used, the address can be register+offset, so make sure the address
@@ -230,12 +233,26 @@
}
emit_insn (gen_load_lockedpti (pti, op1));
- if (WORDS_BIG_ENDIAN)
+ if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
emit_move_insn (op0, gen_lowpart (TImode, pti));
else
{
- emit_move_insn (gen_lowpart (DImode, op0), gen_highpart (DImode, pti));
- emit_move_insn (gen_highpart (DImode, op0), gen_lowpart (DImode, pti));
+ rtx op0_lo = gen_lowpart (DImode, op0);
+ rtx op0_hi = gen_highpart (DImode, op0);
+ rtx pti_lo = gen_lowpart (DImode, pti);
+ rtx pti_hi = gen_highpart (DImode, pti);
+
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ if (WORDS_BIG_ENDIAN)
+ {
+ emit_move_insn (op0_hi, pti_hi);
+ emit_move_insn (op0_lo, pti_lo);
+ }
+ else
+ {
+ emit_move_insn (op0_hi, pti_lo);
+ emit_move_insn (op0_lo, pti_hi);
+ }
}
DONE;
})
@@ -260,8 +277,9 @@
[(set_attr "type" "store_c")])
;; Use a temporary register to force getting an even register for the
-;; lqarx/stqcrx. instructions. Normal optimizations will eliminate this extra
-;; copy on big endian systems.
+;; lqarx/stqcrx. instructions. Under AT 7.0, we need use an explicit copy,
+;; even in big endian mode. In GCC 4.9, the register allocator is smart enough
+;; to assign a even/odd register pair.
;; On little endian systems where non-atomic quad word load/store instructions
;; are not used, the address can be register+offset, so make sure the address
@@ -290,12 +308,26 @@
pti_mem = change_address (op1, PTImode, addr);
pti_reg = gen_reg_rtx (PTImode);
- if (WORDS_BIG_ENDIAN)
+ if (WORDS_BIG_ENDIAN && rs6000_lra_flag)
emit_move_insn (pti_reg, gen_lowpart (PTImode, op2));
else
{
- emit_move_insn (gen_lowpart (DImode, pti_reg), gen_highpart (DImode, op2));
- emit_move_insn (gen_highpart (DImode, pti_reg), gen_lowpart (DImode, op2));
+ rtx op2_lo = gen_lowpart (DImode, op2);
+ rtx op2_hi = gen_highpart (DImode, op2);
+ rtx pti_lo = gen_lowpart (DImode, pti_reg);
+ rtx pti_hi = gen_highpart (DImode, pti_reg);
+
+ emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
+ if (WORDS_BIG_ENDIAN)
+ {
+ emit_move_insn (pti_hi, op2_hi);
+ emit_move_insn (pti_lo, op2_lo);
+ }
+ else
+ {
+ emit_move_insn (pti_hi, op2_lo);
+ emit_move_insn (pti_lo, op2_hi);
+ }
}
emit_insn (gen_store_conditionalpti (op0, pti_mem, pti_reg));