===================================================================
@@ -126,7 +126,9 @@ (define_split
(match_operand:VEC_L 1 "input_operand" ""))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
===================================================================
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx,
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
===================================================================
@@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx,
don't link in rs6000-c.c, so we can't call it directly. */
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+/* Simplfy register classes into simpler classifications. We assume
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+ check for standard register classes (gpr/floating/altivec/vsx) and
+ floating/vector classes (float/altivec/vsx). */
+
+enum rs6000_reg_type {
+ NO_REG_TYPE,
+ PSEUDO_REG_TYPE,
+ GPR_REG_TYPE,
+ VSX_REG_TYPE,
+ ALTIVEC_REG_TYPE,
+ FPR_REG_TYPE,
+ SPR_REG_TYPE,
+ CR_REG_TYPE,
+ SPE_ACC_TYPE,
+ SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type. */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+ purpose, floating point, altivec, and VSX registers). */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+/* Direct moves to/from vsx/gpr registers that need an additional register to
+ do the move. */
+static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES];
+static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES];
+static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES];
+
/* Target cpu costs. */
@@ -1042,6 +1075,13 @@ static void rs6000_print_isa_options (FI
static void rs6000_print_builtin_options (FILE *, int, const char *,
HOST_WIDE_INT);
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+ enum rs6000_reg_type,
+ enum machine_mode,
+ secondary_reload_info *,
+ bool);
+
/* Hash table stuff for keeping track of TOC entries. */
struct GTY(()) toc_hash_struct
@@ -1587,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, en
return ALTIVEC_REGNO_P (last_regno);
}
- /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode
- can only go in GPRs. */
+ /* Allow TImode in all VSX registers if the user asked for it. */
if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
return 1;
@@ -2154,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool glo
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
+ /* Precalculate register class to simpler reload register class. We don't
+ need all of the register classes that are combinations of different
+ classes, just the simple ones that have constraint letters. */
+ for (c = 0; c < N_REG_CLASSES; c++)
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+ if (TARGET_VSX)
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+ }
+ else
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+ }
+
/* Precalculate vector information, this must be set up before the
rs6000_hard_regno_nregs_internal below. */
for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2305,7 +2374,15 @@ rs6000_init_hard_regno_mode_ok (bool glo
if (TARGET_LFIWZX)
rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
- /* Set up the reload helper functions. */
+ /* Setup the direct move combinations. */
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ {
+ reload_fpr_gpr[m] = CODE_FOR_nothing;
+ reload_gpr_vsx[m] = CODE_FOR_nothing;
+ reload_vsx_gpr[m] = CODE_FOR_nothing;
+ }
+
+ /* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
if (TARGET_64BIT)
@@ -2329,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool glo
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store;
rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load;
}
+ if (TARGET_DIRECT_MOVE)
+ {
+ if (TARGET_POWERPC64)
+ {
+ reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti;
+ reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df;
+ reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di;
+ reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf;
+ reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si;
+ reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi;
+ reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi;
+ reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf;
+
+ reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti;
+ reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df;
+ reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di;
+ reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf;
+ reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si;
+ reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi;
+ reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi;
+ reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf;
+ }
+ else
+ {
+ reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi;
+ reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd;
+ reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf;
+ }
+ }
}
else
{
@@ -2356,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool glo
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store;
@@ -5405,6 +5525,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
}
+/* Return true if this is a move direct operation between GPR registers and
+ floating point/VSX registers. */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+ int regno0, regno1;
+
+ if (!REG_P (op0) || !REG_P (op1))
+ return false;
+
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+ return false;
+
+ regno0 = REGNO (op0);
+ regno1 = REGNO (op1);
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+ return false;
+
+ if (INT_REGNO_P (regno0))
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+ else if (INT_REGNO_P (regno1))
+ {
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+ return true;
+
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if this is a load or store quad operation. */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+ bool ret;
+
+ if (!TARGET_QUAD_MEMORY)
+ ret = false;
+
+ else if (REG_P (op0) && MEM_P (op1))
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+ && quad_memory_operand (op1, GET_MODE (op1))
+ && !reg_overlap_mentioned_p (op0, op1));
+
+ else if (MEM_P (op0) && REG_P (op1))
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+ else
+ ret = false;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
+ ret ? "true" : "false");
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+ }
+
+ return ret;
+}
+
/* Given an address, return a constant offset term if one exists. */
static rtx
@@ -5912,8 +6098,11 @@ rs6000_legitimize_address (rtx x, rtx ol
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
return force_reg (Pmode, XEXP (x, 0));
+ /* For TImode with load/store quad, restrict addresses to just a single
+ pointer, so it works with both GPRs and VSX registers. */
/* Make sure both operands are registers. */
- else if (GET_CODE (x) == PLUS)
+ else if (GET_CODE (x) == PLUS
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
return gen_rtx_PLUS (Pmode,
force_reg (Pmode, XEXP (x, 0)),
force_reg (Pmode, XEXP (x, 1)));
@@ -6868,6 +7057,13 @@ rs6000_legitimate_address_p (enum machin
if (reg_offset_p
&& legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
return 1;
+ /* For TImode, if we have load/store quad, only allow register indirect
+ addresses. This will allow the values to go in either GPRs or VSX
+ registers without reloading. The vector types would tend to go into VSX
+ registers, so we allow REG+REG, while TImode seems somewhat split, in that
+ some uses are GPR based, and some VSX based. */
+ if (mode == TImode && TARGET_QUAD_MEMORY)
+ return 0;
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
if (! reg_ok_strict
&& reg_offset_p
@@ -14014,29 +14210,226 @@ rs6000_check_sdmode (tree *tp, int *walk
return NULL_TREE;
}
-enum reload_reg_type {
- GPR_REGISTER_TYPE,
- VECTOR_REGISTER_TYPE,
- OTHER_REGISTER_TYPE
-};
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+ only work on the traditional altivec registers, note if an altivec register
+ was choosen. */
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
{
- switch (rclass)
+ HOST_WIDE_INT regno;
+ enum reg_class rclass;
+
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (!REG_P (reg))
+ return NO_REG_TYPE;
+
+ regno = REGNO (reg);
+ if (regno >= FIRST_PSEUDO_REGISTER)
{
- case GENERAL_REGS:
- case BASE_REGS:
- return GPR_REGISTER_TYPE;
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
+ return PSEUDO_REG_TYPE;
- case FLOAT_REGS:
- case ALTIVEC_REGS:
- case VSX_REGS:
- return VECTOR_REGISTER_TYPE;
+ regno = true_regnum (reg);
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ return PSEUDO_REG_TYPE;
+ }
- default:
- return OTHER_REGISTER_TYPE;
+ gcc_assert (regno >= 0);
+
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
+ *is_altivec = true;
+
+ rclass = rs6000_regno_regclass[regno];
+ return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+ different register classe is really a simple move. */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode)
+{
+ int size;
+
+ /* Add support for various direct moves available. In this function, we only
+ look at cases where we don't need any extra registers, and one or more
+ simple move insns are issued. At present, 32-bit integers are not allowed
+ in FPR/VSX registers. Single precision binary floating is not a simple
+ move because we need to convert to the single precision memory layout.
+ The 4-byte SDmode can be moved. */
+ size = GET_MODE_SIZE (mode);
+ if (TARGET_DIRECT_MOVE
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+ special direct moves that involve allocating an extra register, return the
+ insn code of the helper function if there is such a function or
+ CODE_FOR_nothing if not. */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ bool ret = false;
+ enum insn_code icode = CODE_FOR_nothing;
+ int cost = 0;
+ int size = GET_MODE_SIZE (mode);
+
+ if (TARGET_POWERPC64)
+ {
+ if (size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (mode == SFmode)
+ {
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 2; /* mtvsrz, xscvspdpn. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+ }
+ }
+
+ if (TARGET_POWERPC64 && size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (!TARGET_POWERPC64 && size == 8)
+ {
+ /* Handle moving 64-bit values from GPRs to floating point registers on
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+ values back together. Altivec register classes must be handled
+ specially since a different instruction is used, and the secondary
+ reload support requires a single instruction class in the scratch
+ register constraint. However, right now TFmode is not allowed in
+ Altivec registers, so the pattern will never match. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+ {
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
+ icode = reload_fpr_gpr[(int)mode];
+ }
}
+
+ if (icode != CODE_FOR_nothing)
+ {
+ ret = true;
+ if (sri)
+ {
+ sri->icode = icode;
+ sri->extra_cost = cost;
+ }
+ }
+
+ return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+ directly (simple move) or via a pattern that uses a single extra temporary
+ (using power8's direct move in this case. */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ /* Fall back to load/store reloads if either type is not a register. */
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+ return false;
+
+ /* If we haven't allocated registers yet, assume the move can be done for the
+ standard register types. */
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+ return true;
+
+ /* Moves to the same set of registers is a simple move for non-specialized
+ registers. */
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+ return true;
+
+ /* Check whether a simple move can be done directly. */
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+ {
+ if (sri)
+ {
+ sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
+ }
+ return true;
+ }
+
+ /* Now check if we can do it in a few steps. */
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+ altivec_p);
}
/* Inform reload about cases where moving X with a mode MODE to a register in
@@ -14062,11 +14455,32 @@ rs6000_secondary_reload (bool in_p,
bool default_p = false;
sri->icode = CODE_FOR_nothing;
-
- /* Convert vector loads and stores into gprs to use an additional base
- register. */
icode = rs6000_vector_reload[mode][in_p != false];
- if (icode != CODE_FOR_nothing)
+
+ if (REG_P (x) || register_operand (x, mode))
+ {
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+ bool altivec_p = (rclass == ALTIVEC_REGS);
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+ if (!in_p)
+ {
+ enum rs6000_reg_type exchange = to_type;
+ to_type = from_type;
+ from_type = exchange;
+ }
+
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+ altivec_p))
+ {
+ icode = (enum insn_code)sri->icode;
+ default_p = false;
+ ret = NO_REGS;
+ }
+ }
+
+ /* Handle vector moves with reload helper functions. */
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
{
ret = NO_REGS;
sri->icode = CODE_FOR_nothing;
@@ -14078,12 +14492,21 @@ rs6000_secondary_reload (bool in_p,
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
an extra register in that case, but it would need an extra
- register if the addressing is reg+reg or (reg+reg)&(-16). */
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
+ case load/store quad. */
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
{
- if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true))
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+ && GET_MODE_SIZE (mode) == 16
+ && quad_memory_operand (x, mode))
+ {
+ sri->icode = icode;
+ sri->extra_cost = 2;
+ }
+
+ else if (!legitimate_indirect_address_p (addr, false)
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
+ false, true))
{
sri->icode = icode;
/* account for splitting the loads, and converting the
@@ -14097,7 +14520,7 @@ rs6000_secondary_reload (bool in_p,
else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
&& (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& (legitimate_indirect_address_p (addr, false)
- || legitimate_indirect_address_p (XEXP (addr, 0), false)
+ || legitimate_indirect_address_p (addr, false)
|| rs6000_legitimate_offset_address_p (mode, addr,
false, true)))
@@ -14149,12 +14572,12 @@ rs6000_secondary_reload (bool in_p,
else
{
enum reg_class xclass = REGNO_REG_CLASS (regno);
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
/* If memory is needed, use default_secondary_reload to create the
stack slot. */
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
default_p = true;
else
ret = NO_REGS;
@@ -14164,7 +14587,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
{
@@ -14203,7 +14626,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (!TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
@@ -14766,42 +15189,25 @@ rs6000_debug_preferred_reload_class (rtx
set and vice versa. */
static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- if (class1 == class2)
- return false;
-
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
- between these classes. But we need memory for other things that can go in
- FLOAT_REGS like SFmode. */
- if (TARGET_VSX
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
- || class1 == FLOAT_REGS))
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
- && class2 != FLOAT_REGS);
-
- if (class1 == VSX_REGS || class2 == VSX_REGS)
- return true;
-
- if (class1 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ enum rs6000_reg_type from_type, to_type;
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
+ || (to_class == ALTIVEC_REGS));
+
+ /* If a simple/direct move is available, we don't need secondary memory */
+ from_type = reg_class_to_reg_type[(int)from_class];
+ to_type = reg_class_to_reg_type[(int)to_class];
- if (class2 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
+ (secondary_reload_info *)0, altivec_p))
+ return false;
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ /* If we have a floating point or vector register class, we need to use
+ memory to transfer the data. */
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
return true;
return false;
@@ -14809,17 +15215,19 @@ rs6000_secondary_memory_needed (enum reg
/* Debug version of rs6000_secondary_memory_needed. */
static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
fprintf (stderr,
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
- "class2 = %s, mode = %s\n",
- ret ? "true" : "false", reg_class_names[class1],
- reg_class_names[class2], GET_MODE_NAME (mode));
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+ "to_class = %s, mode = %s\n",
+ ret ? "true" : "false",
+ reg_class_names[from_class],
+ reg_class_names[to_class],
+ GET_MODE_NAME (mode));
return ret;
}
@@ -15025,6 +15433,170 @@ rs6000_debug_cannot_change_mode_class (e
return ret;
}
+/* Return a string to do a move operation of 128 bits of data. */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ enum machine_mode mode = GET_MODE (dest);
+ int dest_regno;
+ int src_regno;
+ bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p;
+ bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p;
+
+ if (REG_P (dest))
+ {
+ dest_regno = REGNO (dest);
+ dest_gpr_p = INT_REGNO_P (dest_regno);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_av_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_av_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false;
+ }
+
+ if (REG_P (src))
+ {
+ src_regno = REGNO (src);
+ src_gpr_p = INT_REGNO_P (src_regno);
+ src_fp_p = FP_REGNO_P (src_regno);
+ src_av_p = ALTIVEC_REGNO_P (src_regno);
+ src_vsx_p = src_fp_p | src_av_p;
+ }
+ else
+ {
+ src_regno = -1;
+ src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false;
+ }
+
+ /* Register moves. */
+ if (dest_regno >= 0 && src_regno >= 0)
+ {
+ if (dest_gpr_p)
+ {
+ if (src_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+ return "#";
+ }
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (src_vsx_p)
+ return "xxlor %x0,%x1,%x1";
+
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p && src_av_p)
+ return "vor %0,%1,%1";
+
+ else if (dest_fp_p && src_fp_p)
+ return "#";
+ }
+
+ /* Loads. */
+ else if (dest_regno >= 0 && MEM_P (src))
+ {
+ if (dest_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
+ && quad_memory_operand (src, mode)
+ && !reg_overlap_mentioned_p (dest, src))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "lvx %0,%y1";
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "lxvw4x %x0,%y1";
+ else
+ return "lxvd2x %x0,%y1";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return "lvx %0,%y1";
+
+ else if (dest_fp_p)
+ return "#";
+ }
+
+ /* Stores. */
+ else if (src_regno >= 0 && MEM_P (dest))
+ {
+ if (src_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
+ && quad_memory_operand (dest, mode))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "stvx %1,%y0";
+
+ else if (TARGET_VSX && src_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "stxvw4x %x1,%y0";
+ else
+ return "stxvd2x %x1,%y0";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p)
+ return "stvx %1,%y0";
+
+ else if (src_fp_p)
+ return "#";
+ }
+
+ /* Constants. */
+ else if (dest_regno >= 0
+ && (GET_CODE (src) == CONST_INT
+ || GET_CODE (src) == CONST_DOUBLE
+ || GET_CODE (src) == CONST_VECTOR))
+ {
+ if (dest_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+ return "xxlxor %x0,%x0,%x0";
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return output_vec_const_move (operands);
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+ }
+
+ gcc_unreachable ();
+}
+
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
===================================================================
@@ -217,112 +217,31 @@ (define_c_enum "unspec"
;; VSX moves
(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
"VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
- switch (which_alternative)
- {
- case 0:
- case 3:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stx<VSm>x %x1,%y0";
-
- case 1:
- case 4:
- gcc_assert (MEM_P (operands[1])
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
- return "lx<VSm>x %x0,%y1";
-
- case 2:
- case 5:
- return "xxlor %x0,%x1,%x1";
-
- case 6:
- case 7:
- case 8:
- case 11:
- return "#";
-
- case 9:
- case 10:
- return "xxlxor %x0,%x0,%x0";
-
- case 12:
- return output_vec_const_move (operands);
-
- case 13:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stvx %1,%y0";
-
- case 14:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "lvx %0,%y1";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
;; use of TImode is for unions. However for plain data movement, slightly
;; favor the vector loads
(define_insn "*vsx_movti_64bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r")
- (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
&& (register_operand (operands[0], TImode)
|| register_operand (operands[1], TImode))"
{
- switch (which_alternative)
- {
- case 0:
- return "stxvd2x %x1,%y0";
-
- case 1:
- return "lxvd2x %x0,%y1";
-
- case 2:
- return "xxlor %x0,%x1,%x1";
-
- case 3:
- return "xxlxor %x0,%x0,%x0";
-
- case 4:
- return output_vec_const_move (operands);
-
- case 5:
- return "stvx %1,%y0";
-
- case 6:
- return "lvx %0,%y1";
-
- case 7:
- case 8:
- case 9:
- case 10:
- return "#";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*")
- (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
(define_insn "*vsx_movti_32bit"
[(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
===================================================================
@@ -127,6 +127,13 @@ (define_c_enum "unspec"
UNSPEC_LFIWZX
UNSPEC_FCTIWUZ
UNSPEC_GRP_END_NOP
+ UNSPEC_P8V_FMRGOW
+ UNSPEC_P8V_MTVSRWZ
+ UNSPEC_P8V_RELOAD_FROM_GPR
+ UNSPEC_P8V_MTVSRD
+ UNSPEC_P8V_XXPERMDI
+ UNSPEC_P8V_RELOAD_FROM_VSX
+ UNSPEC_FUSION_GPR
])
;;
@@ -268,6 +275,15 @@ (define_mode_iterator FMOVE64X [DI DF DD
(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
+ (V16QI "")
+ (V8HI "")
+ (V4SI "")
+ (V4SF "")
+ (V2DI "")
+ (V2DF "")])
+
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
(DF "")
@@ -284,11 +300,16 @@ (define_mode_attr real_value_to_target [
(define_mode_attr f32_lr [(SF "f") (SD "wz")])
(define_mode_attr f32_lm [(SF "m") (SD "Z")])
(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")])
; Definitions for store from 32-bit fpr register
(define_mode_attr f32_sr [(SF "f") (SD "wx")])
(define_mode_attr f32_sm [(SF "m") (SD "Z")])
(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
@@ -368,7 +389,7 @@ (define_expand "zero_extend<mode>di2"
(define_insn "*zero_extend<mode>di2_internal1"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
"@
l<wd>z%U1%X1 %0,%1
rldicl %0,%1,0,<dbits>"
@@ -434,6 +455,29 @@ (define_split
(const_int 0)))]
"")
+(define_insn "*zero_extendsidi2_lfiwzx"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+ (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWZX"
+ "@
+ lwz%U1%X1 %0,%1
+ rldicl %0,%1,0,32
+ mtvsrwz %x0,%1
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_u")
+ (const_string "load")))
+ (const_string "*")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
(define_insn "extendqidi2"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -581,10 +625,33 @@ (define_expand "extendsidi2"
"TARGET_POWERPC64"
"")
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWAX"
+ "@
+ lwa%U1%X1 %0,%1
+ extsw %0,%1
+ mtvsrwa %x0,%1
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_u")
+ (const_string "load_ext")))
+ (const_string "exts")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
- "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+ "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
"@
lwa%U1%X1 %0,%1
extsw %0,%1"
@@ -598,7 +665,7 @@ (define_insn ""
(const_string "load_ext")))
(const_string "exts")])])
-(define_insn ""
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
"TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -2035,7 +2102,9 @@ (define_insn "parity<mode>2_cmpb"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
"TARGET_CMPB && TARGET_POPCNTB"
- "prty<wd> %0,%1")
+ "prty<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "popcnt")])
(define_expand "parity<mode>2"
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4316,7 +4385,7 @@ (define_insn ""
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -4348,7 +4417,7 @@ (define_insn ""
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -5553,12 +5622,15 @@ (define_insn "*fselsfdf4"
; We don't define lfiwax/lfiwzx with the normal definition, because we
; don't want to support putting SImode in FPR registers.
(define_insn "lfiwax"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWAX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
- "lfiwax %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1
+ mtvsrwa %x0,%1"
+ [(set_attr "type" "fpload,fpload,mffgpr")])
; This split must be run before register allocation because it allocates the
; memory slot that is needed to move values to/from the FPR. We don't allocate
@@ -5580,7 +5652,8 @@ (define_insn_and_split "floatsi<mode>2_l
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, false);
else
{
@@ -5629,12 +5702,15 @@ (define_insn_and_split "floatsi<mode>2_l
(set_attr "type" "fpload")])
(define_insn "lfiwzx"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWZX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
- "lfiwzx %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1
+ mtvsrwz %x0,%1"
+ [(set_attr "type" "fpload,fpload,mftgpr")])
(define_insn_and_split "floatunssi<mode>2_lfiwzx"
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5651,7 +5727,8 @@ (define_insn_and_split "floatunssi<mode>
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, true);
else
{
@@ -5942,7 +6019,7 @@ (define_insn_and_split "fix_trunc<mode>s
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -6036,7 +6113,7 @@ (define_insn_and_split "fixuns_trunc<mod
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -8490,7 +8567,7 @@ (define_insn "*mov<mode>_internal2"
cmp<wd>i %2,%0,0
mr. %0,%1
#"
- [(set_attr "type" "cmp,compare,cmp")
+ [(set_attr "type" "cmp,fast_compare,cmp")
(set_attr "length" "4,4,8")])
(define_split
@@ -8680,8 +8757,8 @@ (define_split
}")
(define_insn "mov<mode>_hardfloat"
- [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
- (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -8694,6 +8771,10 @@ (define_insn "mov<mode>_hardfloat"
xxlxor %x0,%x0,%x0
<f32_li>
<f32_si>
+ <f32_lv>
+ <f32_sv>
+ mtvsrwz %x0,%1
+ mfvsrwz %0,%x1
mt%0 %1
mf%1 %0
nop
@@ -8732,16 +8813,20 @@ (define_insn "mov<mode>_hardfloat"
(match_test "update_address_mem (operands[0], VOIDmode)")
(const_string "fpstore_u")
(const_string "fpstore")))
+ (const_string "fpload")
+ (const_string "fpstore")
+ (const_string "mftgpr")
+ (const_string "mffgpr")
(const_string "mtjmpr")
(const_string "mfjmpr")
(const_string "*")
(const_string "*")
(const_string "*")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
(define_insn "*mov<mode>_softfloat"
[(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
- (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+ (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
@@ -8954,8 +9039,8 @@ (define_insn "*mov<mode>_softfloat32"
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*mov<mode>_hardfloat64"
- [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg")
- (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))]
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8980,7 +9065,9 @@ (define_insn "*mov<mode>_hardfloat64"
#
#
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9038,8 +9125,10 @@ (define_insn "*mov<mode>_hardfloat64"
(const_string "*")
(const_string "*")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
(define_insn "*mov<mode>_softfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -9419,6 +9508,216 @@ (define_expand "reload_<mode>_load"
})
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode. We use TF mode to get two registers to move the
+;; individual 32-bit parts across. Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it. We have two patterns to do the two moves between gprs and
+;; fprs. There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions. TFmode is
+;; currently limited to traditional FPR registers. If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+ (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+ UNSPEC_P8V_FMRGOW))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "fmrgow %0,%1,%L1"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+ [(set (match_operand:TF 0 "register_operand" "=d")
+ (unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %x0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+ [(set (match_operand:TF 0 "register_operand" "+d")
+ (unspec:TF [(match_dup 0)
+ (match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+ (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=d"))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (SImode, src);
+ rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+ emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+ [(set (match_operand:TF 0 "register_operand" "=ws")
+ (unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+ [(set (match_operand:TF 0 "register_operand" "+ws")
+ (unspec:TF [(match_dup 0)
+ (match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+ UNSPEC_P8V_XXPERMDI))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "xxpermdi %x0,%1,%L1,0"
+ [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DImode, src);
+ rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+ emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register. Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+ [(set (match_operand:SF 0 "register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:DI 2 "register_operand" "=r"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+ rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+ /* Move SF value to upper 32-bits for xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_move_insn (op0_di, op2);
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+ [(set (match_operand:DF 0 "register_operand" "=r")
+ (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+ rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+ emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register. Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+ emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+ emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+ emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+
;; Next come the multi-word integer load and store and the load and store
;; multiple insns.
@@ -9467,7 +9766,8 @@ (define_split
[(set (match_operand:DI 0 "gpc_reg_operand" "")
(match_operand:DI 1 "const_int_operand" ""))]
"! TARGET_POWERPC64 && reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 1))]
"
@@ -9485,13 +9785,14 @@ (define_split
[(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
(match_operand:DIFD 1 "input_operand" ""))]
"reload_completed && !TARGET_POWERPC64
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
(define_insn "*movdi_internal64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg")
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+ (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
"TARGET_POWERPC64
&& (gpc_reg_operand (operands[0], DImode)
|| gpc_reg_operand (operands[1], DImode))"
@@ -9513,7 +9814,9 @@ (define_insn "*movdi_internal64"
nop
xxlxor %x0,%x0,%x0
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9562,8 +9865,10 @@ (define_insn "*movdi_internal64"
(const_string "*")
(const_string "vecsimple")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
;; Generate all one-bits and clear left or right.
;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -9652,19 +9957,20 @@ (define_insn "*mov<mode>_string"
(const_string "conditional")))])
(define_insn "*mov<mode>_ppc64"
- [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r")
- (match_operand:TI2 1 "input_operand" "r,Y,r"))]
- "(TARGET_POWERPC64
- && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
+ (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+ "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode)))"
"#"
- [(set_attr "type" "store,load,*")])
+ [(set_attr "type" "store,load,*,*")])
(define_split
- [(set (match_operand:TI2 0 "gpc_reg_operand" "")
+ [(set (match_operand:TI2 0 "int_reg_operand" "")
(match_operand:TI2 1 "const_double_operand" ""))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64
+ && (VECTOR_MEM_NONE_P (<MODE>mode)
+ || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
"
@@ -9691,7 +9997,9 @@ (define_split
[(set (match_operand:TI2 0 "nonimmediate_operand" "")
(match_operand:TI2 1 "input_operand" ""))]
"reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
===================================================================
@@ -0,0 +1,13 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "mtvsrd" 4 } } */
+/* { dg-final { scan-assembler-times "mfvsrd" 4 } } */
+
+/* Check code generation for direct move for long types. */
+
+#define TYPE vector int
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,12 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime. */
+
+#define TYPE vector int
+#define DO_MAIN
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,183 @@
+/* Test functions for direct move support. */
+
+
+void __attribute__((__noinline__))
+copy (TYPE *a, TYPE *b)
+{
+ *b = *a;
+}
+
+#ifndef NO_GPR
+void __attribute__((__noinline__))
+load_gpr (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
+ *b = c;
+}
+#endif
+
+#ifndef NO_FPR
+void __attribute__((__noinline__))
+load_fpr (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ __asm__ ("# fpr, reg = %0" : "+d" (c));
+ *b = c;
+}
+#endif
+
+#ifndef NO_ALTIVEC
+void __attribute__((__noinline__))
+load_altivec (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ __asm__ ("# altivec, reg = %0" : "+v" (c));
+ *b = c;
+}
+#endif
+
+#ifndef NO_VSX
+void __attribute__((__noinline__))
+load_vsx (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ __asm__ ("# vsx, reg = %x0" : "+wa" (c));
+ *b = c;
+}
+#endif
+
+#ifndef NO_GPR_TO_VSX
+void __attribute__((__noinline__))
+load_gpr_to_vsx (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ TYPE d;
+ __asm__ ("# gpr, reg = %0" : "+b" (c));
+ d = c;
+ __asm__ ("# vsx, reg = %x0" : "+wa" (d));
+ *b = d;
+}
+#endif
+
+#ifndef NO_VSX_TO_GPR
+void __attribute__((__noinline__))
+load_vsx_to_gpr (TYPE *a, TYPE *b)
+{
+ TYPE c = *a;
+ TYPE d;
+ __asm__ ("# vsx, reg = %x0" : "+wa" (c));
+ d = c;
+ __asm__ ("# gpr, reg = %0" : "+b" (d));
+ *b = d;
+}
+#endif
+
+#ifdef DO_MAIN
+typedef void (fn_type (TYPE *, TYPE *));
+
+struct test_struct {
+ fn_type *func;
+ const char *name;
+};
+
+const struct test_struct test_functions[] = {
+ { copy, "copy" },
+#ifndef NO_GPR
+ { load_gpr, "load_gpr" },
+#endif
+#ifndef NO_FPR
+ { load_fpr, "load_fpr" },
+#endif
+#ifndef NO_ALTIVEC
+ { load_altivec, "load_altivec" },
+#endif
+#ifndef NO_VSX
+ { load_vsx, "load_vsx" },
+#endif
+#ifndef NO_GPR_TO_VSX
+ { load_gpr_to_vsx, "load_gpr_to_vsx" },
+#endif
+#ifndef NO_VSX_TO_GPR
+ { load_vsx_to_gpr, "load_vsx_to_gpr" },
+#endif
+};
+
+/* Test a given value for each of the functions. */
+void __attribute__((__noinline__))
+test_value (TYPE a)
+{
+ size_t i;
+
+ for (i = 0; i < sizeof (test_functions) / sizeof (test_functions[0]); i++)
+ {
+ TYPE b;
+
+ test_functions[i].func (&a, &b);
+ if (memcmp ((void *)&a, (void *)&b, sizeof (TYPE)) != 0)
+ abort ();
+ }
+}
+
+/* Main program. */
+int
+main (void)
+{
+ size_t i;
+ long j;
+ union {
+ TYPE value;
+ unsigned char bytes[sizeof (TYPE)];
+ } u;
+
+#if IS_INT
+ TYPE value = (TYPE)-5;
+ for (i = 0; i < 12; i++)
+ {
+ test_value (value);
+ value++;
+ }
+
+ for (i = 0; i < 8*sizeof (TYPE); i++)
+ test_value (((TYPE)1) << i);
+
+#elif IS_UNS
+ TYPE value = (TYPE)0;
+ for (i = 0; i < 10; i++)
+ {
+ test_value (value);
+ test_value (~ value);
+ value++;
+ }
+
+ for (i = 0; i < 8*sizeof (TYPE); i++)
+ test_value (((TYPE)1) << i);
+
+#elif IS_FLOAT
+ TYPE value = (TYPE)-5;
+ for (i = 0; i < 12; i++)
+ {
+ test_value (value);
+ value++;
+ }
+
+ test_value ((TYPE)3.1415926535);
+ test_value ((TYPE)1.23456);
+ test_value ((TYPE)(-0.0));
+ test_value ((TYPE)NAN);
+ test_value ((TYPE)+INFINITY);
+ test_value ((TYPE)-INFINITY);
+#else
+
+ for (j = 0; j < 10; j++)
+ {
+ for (i = 0; i < sizeof (TYPE); i++)
+ u.bytes[i] = (unsigned char) (random () >> 4);
+
+ test_value (u.value);
+ }
+#endif
+
+ return 0;
+}
+#endif
===================================================================
@@ -0,0 +1,17 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "mtvsrd" 2 } } */
+/* { dg-final { scan-assembler-times "mfvsrd" 2 } } */
+/* { dg-final { scan-assembler-times "xscvdpspn" 2 } } */
+/* { dg-final { scan-assembler-times "xscvspdpn" 2 } } */
+
+/* Check code generation for direct move for long types. */
+
+#define TYPE float
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime. */
+
+#define TYPE float
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "mtvsrd" 1 } } */
+/* { dg-final { scan-assembler-times "mfvsrd" 1 } } */
+
+/* Check code generation for direct move for long types. */
+
+#define TYPE double
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,15 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mcpu=power8 -O2" } */
+/* { dg-final { scan-assembler-times "mtvsrd" 1 } } */
+/* { dg-final { scan-assembler-times "mfvsrd" 2 } } */
+
+/* Check code generation for direct move for long types. */
+
+#define TYPE long
+#define IS_INT 1
+#define NO_ALTIVEC 1
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime. */
+
+#define TYPE double
+#define IS_FLOAT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+
+#include "direct-move.h"
===================================================================
@@ -0,0 +1,14 @@
+/* { dg-do run { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target p8vector_hw } */
+/* { dg-options "-mcpu=power8 -O2" } */
+
+/* Check whether we get the right bits for direct move at runtime. */
+
+#define TYPE long
+#define IS_INT 1
+#define NO_ALTIVEC 1
+#define DO_MAIN
+
+#include "direct-move.h"