===================================================================
@@ -54,7 +54,7 @@ (define_mode_iterator VEC_E [V16QI V8HI
(define_mode_iterator VEC_64 [V2DI V2DF])
;; Vector reload iterator
-(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF DF TI])
+(define_mode_iterator VEC_R [V16QI V8HI V4SI V2DI V4SF V2DF SF SD SI DF DD DI TI])
;; Base type from vector mode
(define_mode_attr VEC_base [(V16QI "QI")
===================================================================
@@ -68,6 +68,16 @@ (define_register_constraint "ws" "rs6000
(define_register_constraint "wa" "rs6000_constraints[RS6000_CONSTRAINT_wa]"
"@internal")
+;; Register constraints to simplify move patterns
+(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
+ "Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
+ "Floating point register if the STFIWX instruction is enabled or NO_REGS.")
+
+(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
+ "Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+
;; Altivec style load/store that ignores the bottom bits of the address
(define_memory_constraint "wZ"
"Indexed or indirect memory operand, ignoring the bottom 4 bits"
===================================================================
@@ -1737,14 +1737,21 @@ rs6000_debug_reg_global (void)
"wa reg_class = %s\n"
"wd reg_class = %s\n"
"wf reg_class = %s\n"
- "ws reg_class = %s\n\n",
+ "wl reg_class = %s\n"
+ "ws reg_class = %s\n"
+ "wx reg_class = %s\n"
+ "wz reg_class = %s\n"
+ "\n",
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
- reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]]);
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
for (m = 0; m < NUM_MACHINE_MODES; ++m)
if (rs6000_vector_unit[m] || rs6000_vector_mem[m])
@@ -2108,9 +2115,20 @@ rs6000_init_hard_regno_mode_ok (bool glo
: FLOAT_REGS);
}
+ /* Add conditional constraints based on various options, to allow us to
+ collapse multiple insn patterns. */
if (TARGET_ALTIVEC)
rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
+ if (TARGET_LFIWAX)
+ rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+
+ if (TARGET_STFIWX)
+ rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
+
+ if (TARGET_LFIWZX)
+ rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
+
/* Set up the reload helper functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
@@ -5176,6 +5194,13 @@ reg_offset_addressing_ok_p (enum machine
return false;
break;
+ case SDmode:
+ /* If we can do direct load/stores of SDmode, restrict it to reg+reg
+ addressing for the LFIWZX and STFIWX instructions. */
+ if (TARGET_NO_SDMODE_STACK)
+ return false;
+ break;
+
default:
break;
}
@@ -7143,6 +7168,7 @@ rs6000_emit_move (rtx dest, rtx source,
if (reload_in_progress
&& mode == SDmode
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
&& MEM_P (operands[0])
&& rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
&& REG_P (operands[1]))
@@ -7167,6 +7193,7 @@ rs6000_emit_move (rtx dest, rtx source,
&& mode == SDmode
&& REG_P (operands[0])
&& MEM_P (operands[1])
+ && cfun->machine->sdmode_stack_slot != NULL_RTX
&& rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
{
if (FP_REGNO_P (REGNO (operands[0])))
@@ -13622,7 +13649,7 @@ rs6000_secondary_memory_needed_rtx (enum
static bool eliminated = false;
rtx ret;
- if (mode != SDmode)
+ if (mode != SDmode || TARGET_NO_SDMODE_STACK)
ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
else
{
@@ -14209,8 +14236,10 @@ rs6000_secondary_reload_gpr (rtx reg, rt
return;
}
-/* Allocate a 64-bit stack slot to be used for copying SDmode
- values through if this function has any SDmode references. */
+/* Allocate a 64-bit stack slot to be used for copying SDmode values through if
+ this function has any SDmode references. If we are on a power7 or later, we
+ don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
+ can load/store the value. */
static void
rs6000_alloc_sdmode_stack_slot (void)
@@ -14221,6 +14250,9 @@ rs6000_alloc_sdmode_stack_slot (void)
gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
+ if (TARGET_NO_SDMODE_STACK)
+ return;
+
FOR_EACH_BB (bb)
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
{
@@ -14281,8 +14313,7 @@ rs6000_preferred_reload_class (rtx x, en
{
enum machine_mode mode = GET_MODE (x);
- if (VECTOR_UNIT_VSX_P (mode)
- && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
+ if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
return rclass;
if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
===================================================================
@@ -473,6 +473,11 @@ extern int rs6000_vector_align[];
#define TARGET_FCTIDUZ TARGET_POPCNTD
#define TARGET_FCTIWUZ TARGET_POPCNTD
+/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
+ to allocate the SDmode stack slot to get the value into the proper location
+ in the register. */
+#define TARGET_NO_SDMODE_STACK (TARGET_LFIWZX && TARGET_STFIWX && TARGET_DFP)
+
/* In switching from using target_flags to using rs6000_isa_flags, the options
machinery creates OPTION_MASK_<xxx> instead of MASK_<xxx>. For now map
OPTION_MASK_<xxx> back into MASK_<xxx>. */
@@ -1320,7 +1325,10 @@ enum r6000_reg_class_enum {
RS6000_CONSTRAINT_wa, /* Any VSX register */
RS6000_CONSTRAINT_wd, /* VSX register for V2DF */
RS6000_CONSTRAINT_wf, /* VSX register for V4SF */
+ RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */
RS6000_CONSTRAINT_ws, /* VSX register for DF */
+ RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
+ RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */
RS6000_CONSTRAINT_MAX
};
===================================================================
@@ -29,77 +29,6 @@ (define_c_enum "unspec"
])
-(define_expand "movsd"
- [(set (match_operand:SD 0 "nonimmediate_operand" "")
- (match_operand:SD 1 "any_operand" ""))]
- "TARGET_HARD_FLOAT && TARGET_FPRS"
- "{ rs6000_emit_move (operands[0], operands[1], SDmode); DONE; }")
-
-(define_split
- [(set (match_operand:SD 0 "gpc_reg_operand" "")
- (match_operand:SD 1 "const_double_operand" ""))]
- "reload_completed
- && ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
- || (GET_CODE (operands[0]) == SUBREG
- && GET_CODE (SUBREG_REG (operands[0])) == REG
- && REGNO (SUBREG_REG (operands[0])) <= 31))"
- [(set (match_dup 2) (match_dup 3))]
- "
-{
- long l;
- REAL_VALUE_TYPE rv;
-
- REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
- REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
-
- if (! TARGET_POWERPC64)
- operands[2] = operand_subword (operands[0], 0, 0, SDmode);
- else
- operands[2] = gen_lowpart (SImode, operands[0]);
-
- operands[3] = gen_int_mode (l, SImode);
-}")
-
-(define_insn "movsd_hardfloat"
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,r,m,f,*c*l,!r,*h,!r,!r")
- (match_operand:SD 1 "input_operand" "r,m,r,f,r,h,0,G,Fn"))]
- "(gpc_reg_operand (operands[0], SDmode)
- || gpc_reg_operand (operands[1], SDmode))
- && (TARGET_HARD_FLOAT && TARGET_FPRS)"
- "@
- mr %0,%1
- lwz%U1%X1 %0,%1
- stw%U0%X0 %1,%0
- fmr %0,%1
- mt%0 %1
- mf%1 %0
- nop
- #
- #"
- [(set_attr "type" "*,load,store,fp,mtjmpr,mfjmpr,*,*,*")
- (set_attr "length" "4,4,4,4,4,4,4,4,8")])
-
-(define_insn "movsd_softfloat"
- [(set (match_operand:SD 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,r,*h")
- (match_operand:SD 1 "input_operand" "r,r,h,m,r,I,L,R,G,Fn,0"))]
- "(gpc_reg_operand (operands[0], SDmode)
- || gpc_reg_operand (operands[1], SDmode))
- && (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
- "@
- mr %0,%1
- mt%0 %1
- mf%1 %0
- lwz%U1%X1 %0,%1
- stw%U0%X0 %1,%0
- li %0,%1
- lis %0,%v1
- la %0,%a1
- #
- #
- nop"
- [(set_attr "type" "*,mtjmpr,mfjmpr,load,store,*,*,*,*,*,*")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,8,4")])
-
(define_insn "movsd_store"
[(set (match_operand:DD 0 "nonimmediate_operand" "=m")
(unspec:DD [(match_operand:SD 1 "input_operand" "d")]
===================================================================
@@ -253,6 +253,31 @@ (define_mode_iterator FMA_F [
(V2DF "VECTOR_UNIT_ALTIVEC_OR_VSX_P (V2DFmode)")
])
+; Floating point move iterators to combine binary and decimal moves
+(define_mode_iterator FMOVE32 [SF SD])
+
+; Whether a floating point move is ok, don't allow SD without hardware FP
+(define_mode_attr fmove_ok [(SF "")
+ (DF "")
+ (SD "TARGET_HARD_FLOAT && TARGET_FPRS")
+ (DD "")])
+
+; Convert REAL_VALUE to the appropriate bits
+(define_mode_attr real_value_to_target [(SF "REAL_VALUE_TO_TARGET_SINGLE")
+ (DF "REAL_VALUE_TO_TARGET_DOUBLE")
+ (SD "REAL_VALUE_TO_TARGET_DECIMAL32")
+ (DD "REAL_VALUE_TO_TARGET_DECIMAL64")])
+
+; Definitions for load to 32-bit fpr register
+(define_mode_attr f32_lr [(SF "f") (SD "wz")])
+(define_mode_attr f32_lm [(SF "m") (SD "Z")])
+(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+
+; Definitions for store from 32-bit fpr register
+(define_mode_attr f32_sr [(SF "f") (SD "wx")])
+(define_mode_attr f32_sm [(SF "m") (SD "Z")])
+(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
(define_mode_iterator DIFD [DI (DF "!TARGET_E500_DOUBLE") DD])
@@ -7853,15 +7878,17 @@ (define_insn "*movcc_internal1"
;; can produce floating-point values in fixed-point registers. Unless the
;; value is a simple constant or already in memory, we deal with this by
;; allocating memory and copying the value explicitly via that memory location.
-(define_expand "movsf"
- [(set (match_operand:SF 0 "nonimmediate_operand" "")
- (match_operand:SF 1 "any_operand" ""))]
- ""
- "{ rs6000_emit_move (operands[0], operands[1], SFmode); DONE; }")
+
+;; Move 32-bit binary/decimal floating point
+(define_expand "mov<mode>"
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "")
+ (match_operand:FMOVE32 1 "any_operand" ""))]
+ "<fmove_ok>"
+ "{ rs6000_emit_move (operands[0], operands[1], <MODE>mode); DONE; }")
(define_split
- [(set (match_operand:SF 0 "gpc_reg_operand" "")
- (match_operand:SF 1 "const_double_operand" ""))]
+ [(set (match_operand:FMOVE32 0 "gpc_reg_operand" "")
+ (match_operand:FMOVE32 1 "const_double_operand" ""))]
"reload_completed
&& ((GET_CODE (operands[0]) == REG && REGNO (operands[0]) <= 31)
|| (GET_CODE (operands[0]) == SUBREG
@@ -7874,42 +7901,44 @@ (define_split
REAL_VALUE_TYPE rv;
REAL_VALUE_FROM_CONST_DOUBLE (rv, operands[1]);
- REAL_VALUE_TO_TARGET_SINGLE (rv, l);
+ <real_value_to_target> (rv, l);
if (! TARGET_POWERPC64)
- operands[2] = operand_subword (operands[0], 0, 0, SFmode);
+ operands[2] = operand_subword (operands[0], 0, 0, <MODE>mode);
else
operands[2] = gen_lowpart (SImode, operands[0]);
operands[3] = gen_int_mode (l, SImode);
}")
-(define_insn "*movsf_hardfloat"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=!r,!r,m,f,f,m,*c*l,!r,*h,!r,!r")
- (match_operand:SF 1 "input_operand" "r,m,r,f,m,f,r,h,0,G,Fn"))]
- "(gpc_reg_operand (operands[0], SFmode)
- || gpc_reg_operand (operands[1], SFmode))
+(define_insn "mov<mode>_hardfloat"
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+ "(gpc_reg_operand (operands[0], <MODE>mode)
+ || gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
"@
mr %0,%1
lwz%U1%X1 %0,%1
stw%U0%X0 %1,%0
fmr %0,%1
- lfs%U1%X1 %0,%1
- stfs%U0%X0 %1,%0
+ xxlor %x0,%x1,%x1
+ xxlxor %x0,%x0,%x0
+ <f32_li>
+ <f32_si>
mt%0 %1
mf%1 %0
nop
#
#"
- [(set_attr "type" "*,load,store,fp,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,8")])
+ [(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,fpload,fpstore,mtjmpr,mfjmpr,*,*,*")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
-(define_insn "*movsf_softfloat"
- [(set (match_operand:SF 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
- (match_operand:SF 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
- "(gpc_reg_operand (operands[0], SFmode)
- || gpc_reg_operand (operands[1], SFmode))
+(define_insn "*mov<mode>_softfloat"
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
+ (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+ "(gpc_reg_operand (operands[0], <MODE>mode)
+ || gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
"@
mr %0,%1
===================================================================
@@ -2075,9 +2075,18 @@ VSX vector register to hold vector doubl
@item wf
VSX vector register to hold vector float data
+@item wl
+If the LFIWAX instruction is enabled, a floating point register
+
@item ws
VSX vector register to hold scalar float data
+@item wx
+If the STFIWX instruction is enabled, a floating point register
+
+@item wz
+If the LFIWZX instruction is enabled, a floating point register
+
@item wa
Any VSX register
===================================================================
@@ -0,0 +1,20 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7 -mhard-dfp" } */
+/* { dg-final { scan-assembler-times "lfiwzx" 2 } } */
+/* { dg-final { scan-assembler-times "stfiwx" 1 } } */
+/* { dg-final { scan-assembler-not "lfd" } } */
+/* { dg-final { scan-assembler-not "stfd" } } */
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
+
+/* Test that power7 can directly load/store SDmode variables without using a
+ bounce buffer. */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+ a += (_Decimal32) 1.0;
+}
===================================================================
@@ -0,0 +1,19 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power6 -mhard-dfp" } */
+/* { dg-final { scan-assembler-not "lfiwzx" } } */
+/* { dg-final { scan-assembler-times "lfd" 2 } } */
+/* { dg-final { scan-assembler-times "dctdp" 2 } } */
+/* { dg-final { scan-assembler-times "dadd" 1 } } */
+/* { dg-final { scan-assembler-times "drsp" 1 } } */
+
+/* Test that for power6 we need to use a bounce buffer on the stack to load
+ SDmode variables because the power6 does not have a way to directly load
+ 32-bit values from memory. */
+_Decimal32 a;
+
+void inc_dec32 (void)
+{
+ a += (_Decimal32) 1.0;
+}
===================================================================
@@ -0,0 +1,16 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+/* { dg-final { scan-assembler "xxlxor" } } */
+
+/* Test that we generate xxlor to clear a SFmode register. */
+
+float sum (float *p, unsigned long n)
+{
+ float sum = 0.0f; /* generate xxlxor instead of load */
+ while (n-- > 0)
+ sum += *p++;
+
+ return sum;
+}