===================================================================
@@ -64,7 +64,8 @@ (define_register_constraint "wa" "rs6000
(define_register_constraint "wd" "rs6000_constraints[RS6000_CONSTRAINT_wd]"
"VSX vector register to hold vector double data or NO_REGS.")
-;; we is not currently used
+(define_register_constraint "we" "rs6000_constraints[RS6000_CONSTRAINT_we]"
+ "VSX register if the -mpower9-vector -m64 options were used or NO_REGS.")
(define_register_constraint "wf" "rs6000_constraints[RS6000_CONSTRAINT_wf]"
"VSX vector register to hold vector float data or NO_REGS.")
@@ -147,6 +148,12 @@ (define_memory_constraint "wG"
"Memory operand suitable for TOC fusion memory references"
(match_operand 0 "toc_fusion_mem_wrapped"))
+(define_constraint "wL"
+ "Int constant that is the element number mfvsrld accesses in a vector."
+ (and (match_code "const_int")
+ (and (match_test "TARGET_DIRECT_MOVE_128")
+ (match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)"))))
+
;; Lq/stq validates the address for load/store quad
(define_memory_constraint "wQ"
"Memory operand suitable for the load/store quad instructions"
===================================================================
@@ -2575,6 +2575,10 @@ rs6000_debug_reg_global (void)
if (TARGET_VSX)
fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
(int)VECTOR_ELEMENT_SCALAR_64BIT);
+
+ if (TARGET_DIRECT_MOVE_128)
+ fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit mfvsrld element",
+ (int)VECTOR_ELEMENT_MFVSRLD_64BIT);
}
@@ -2986,6 +2990,10 @@ rs6000_init_hard_regno_mode_ok (bool glo
rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
}
+ /* Support for new direct moves. */
+ if (TARGET_DIRECT_MOVE_128)
+ rs6000_constraints[RS6000_CONSTRAINT_we] = VSX_REGS;
+
/* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
@@ -3034,7 +3042,7 @@ rs6000_init_hard_regno_mode_ok (bool glo
reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
}
- if (TARGET_DIRECT_MOVE)
+ if (TARGET_DIRECT_MOVE && !TARGET_DIRECT_MOVE_128)
{
reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
@@ -18081,6 +18089,11 @@ rs6000_secondary_reload_simple_move (enu
|| (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
return true;
+ else if (TARGET_DIRECT_MOVE_128 && size == 16
+ && ((to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ || (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)))
+ return true;
+
else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
&& ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
|| (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
@@ -18094,7 +18107,7 @@ rs6000_secondary_reload_simple_move (enu
return false;
}
-/* Power8 helper function for rs6000_secondary_reload, handle all of the
+/* Direct move helper function for rs6000_secondary_reload, handle all of the
special direct moves that involve allocating an extra register, return the
insn code of the helper function if there is such a function or
CODE_FOR_nothing if not. */
@@ -18116,8 +18129,8 @@ rs6000_secondary_reload_direct_move (enu
if (size == 16)
{
/* Handle moving 128-bit values from GPRs to VSX point registers on
- power8 when running in 64-bit mode using XXPERMDI to glue the two
- 64-bit values back together. */
+ ISA 2.07 (power8, power9) when running in 64-bit mode using
+ XXPERMDI to glue the two 64-bit values back together. */
if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
{
cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
@@ -18125,7 +18138,7 @@ rs6000_secondary_reload_direct_move (enu
}
/* Handle moving 128-bit values from VSX point registers to GPRs on
- power8 when running in 64-bit mode using XXPERMDI to get access to the
+ ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
bottom 64-bit value. */
else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
{
@@ -18153,7 +18166,7 @@ rs6000_secondary_reload_direct_move (enu
if (TARGET_POWERPC64 && size == 16)
{
/* Handle moving 128-bit values from GPRs to VSX point registers on
- power8 when running in 64-bit mode using XXPERMDI to glue the two
+ ISA 2.07 when running in 64-bit mode using XXPERMDI to glue the two
64-bit values back together. */
if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
{
@@ -18162,7 +18175,7 @@ rs6000_secondary_reload_direct_move (enu
}
/* Handle moving 128-bit values from VSX point registers to GPRs on
- power8 when running in 64-bit mode using XXPERMDI to get access to the
+ ISA 2.07 when running in 64-bit mode using XXPERMDI to get access to the
bottom 64-bit value. */
else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
{
@@ -18174,8 +18187,8 @@ rs6000_secondary_reload_direct_move (enu
else if (!TARGET_POWERPC64 && size == 8)
{
/* Handle moving 64-bit values from GPRs to floating point registers on
- power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
- values back together. Altivec register classes must be handled
+ ISA 2.07 when running in 32-bit mode using FMRGOW to glue the two
+ 32-bit values back together. Altivec register classes must be handled
specially since a different instruction is used, and the secondary
reload support requires a single instruction class in the scratch
register constraint. However, right now TFmode is not allowed in
@@ -18202,7 +18215,7 @@ rs6000_secondary_reload_direct_move (enu
/* Return whether a move between two register classes can be done either
directly (simple move) or via a pattern that uses a single extra temporary
- (using power8's direct move in this case. */
+ (using ISA 2.07's direct move in this case. */
static bool
rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
@@ -19241,6 +19254,11 @@ rs6000_output_move_128bit (rtx operands[
if (src_gpr_p)
return "#";
+ if (TARGET_DIRECT_MOVE_128 && src_vsx_p)
+ return (WORDS_BIG_ENDIAN
+ ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
+ : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
+
else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
return "#";
}
@@ -19250,6 +19268,11 @@ rs6000_output_move_128bit (rtx operands[
if (src_vsx_p)
return "xxlor %x0,%x1,%x1";
+ else if (TARGET_DIRECT_MOVE_128 && src_gpr_p)
+ return (WORDS_BIG_ENDIAN
+ ? "mtvsrdd %x0,%1,%L1"
+ : "mtvsrdd %x0,%L1,%1");
+
else if (TARGET_DIRECT_MOVE && src_gpr_p)
return "#";
}
===================================================================
@@ -760,31 +760,31 @@ (define_split
"")
(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ, v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?<VSa>,?<VSa>,r,we,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ,v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,<VSa>,Z,<VSa>,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
"VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
- (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+ (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")])
;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
;; use of TImode is for unions. However for plain data movement, slightly
;; favor the vector loads
(define_insn "*vsx_movti_64bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
- (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
&& (register_operand (operands[0], TImode)
|| register_operand (operands[1], TImode))"
{
return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
- (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+ (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")])
(define_insn "*vsx_movti_32bit"
[(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
@@ -1909,11 +1909,11 @@ (define_expand "vsx_extract_<mode>"
;; Optimize cases were we can do a simple or direct move.
;; Or see if we can avoid doing the move at all
(define_insn "*vsx_extract_<mode>_internal1"
- [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,<VS_64reg>,r")
+ [(set (match_operand:<VS_scalar> 0 "register_operand" "=d,<VS_64reg>,r,r")
(vec_select:<VS_scalar>
- (match_operand:VSX_D 1 "register_operand" "d,<VS_64reg>,<VS_64dm>")
+ (match_operand:VSX_D 1 "register_operand" "d,<VS_64reg>,<VS_64dm>,<VS_64dm>")
(parallel
- [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD")])))]
+ [(match_operand:QI 2 "vsx_scalar_64bit" "wD,wD,wD,wL")])))]
"VECTOR_MEM_VSX_P (<MODE>mode) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
{
int op0_regno = REGNO (operands[0]);
@@ -1923,14 +1923,16 @@ (define_insn "*vsx_extract_<mode>_intern
return "nop";
if (INT_REGNO_P (op0_regno))
- return "mfvsrd %0,%x1";
+ return ((INTVAL (operands[2]) == VECTOR_ELEMENT_MFVSRLD_64BIT)
+ ? "mfvsrdl %0,%x1"
+ : "mfvsrd %0,%x1");
if (FP_REGNO_P (op0_regno) && FP_REGNO_P (op1_regno))
return "fmr %0,%1";
return "xxlor %x0,%x1,%x1";
}
- [(set_attr "type" "fp,vecsimple,mftgpr")
+ [(set_attr "type" "fp,vecsimple,mftgpr,mftgpr")
(set_attr "length" "4")])
(define_insn "*vsx_extract_<mode>_internal2"
===================================================================
@@ -516,6 +516,10 @@ extern int rs6000_vector_align[];
with scalar instructions. */
#define VECTOR_ELEMENT_SCALAR_64BIT ((BYTES_BIG_ENDIAN) ? 0 : 1)
+/* Element number of the 64-bit value in a 128-bit vector that can be accessed
+ with the ISA 3.0 MFVSRLD instructions. */
+#define VECTOR_ELEMENT_MFVSRLD_64BIT ((BYTES_BIG_ENDIAN) ? 1 : 0)
+
/* Alignment options for fields in structures for sub-targets following
AIX-like ABI.
ALIGN_POWER word-aligns FP doubles (default AIX ABI).
@@ -571,6 +575,8 @@ extern int rs6000_vector_align[];
#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
#define TARGET_VADDUQM (TARGET_P8_VECTOR && TARGET_POWERPC64)
+#define TARGET_DIRECT_MOVE_128 (TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
+ && TARGET_POWERPC64)
/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
in power7, so conditionalize them on p8 features. TImode syncs need quad
@@ -1517,6 +1523,7 @@ enum r6000_reg_class_enum {
RS6000_CONSTRAINT_v, /* Altivec registers */
RS6000_CONSTRAINT_wa, /* Any VSX register */
RS6000_CONSTRAINT_wd, /* VSX register for V2DF */
+ RS6000_CONSTRAINT_we, /* VSX register if ISA 3.0 vector. */
RS6000_CONSTRAINT_wf, /* VSX register for V4SF */
RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */
RS6000_CONSTRAINT_wh, /* FPR register for direct moves. */
===================================================================
@@ -7521,7 +7521,10 @@ (define_split
(match_operand:FMOVE128_GPR 1 "input_operand" ""))]
"reload_completed
&& (int_reg_operand (operands[0], <MODE>mode)
- || int_reg_operand (operands[1], <MODE>mode))"
+ || int_reg_operand (operands[1], <MODE>mode))
+ && (!TARGET_DIRECT_MOVE_128
+ || (!vsx_register_operand (operands[0], <MODE>mode)
+ && !vsx_register_operand (operands[1], <MODE>mode)))"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
===================================================================
@@ -3121,9 +3121,28 @@ asm ("xvadddp %0,%1,%2" : "=wa" (v1) : "
is not correct.
+If an instruction only takes Altivec registers, you do not want to use
+@code{%x<n>}.
+
+@smallexample
+asm ("xsaddqp %0,%1,%2" : "=v" (v1) : "v" (v2), "v" (v3));
+@end smallexample
+
+is correct because the @code{xsaddqp} instruction only takes Altivec
+registers, while:
+
+@smallexample
+asm ("xsaddqp %x0,%x1,%x2" : "=v" (v1) : "v" (v2), "v" (v3));
+@end smallexample
+
+is incorrect.
+
@item wd
VSX vector register to hold vector double data or NO_REGS.
+@item we
+VSX register if the -mpower9-vector -m64 options were used or NO_REGS.
+
@item wf
VSX vector register to hold vector float data or NO_REGS.
@@ -3187,6 +3206,16 @@ Floating point register if the LFIWZX in
@item wD
Int constant that is the element number of the 64-bit scalar in a vector.
+@item wF
+Memory operand suitable for power9 fusion load/stores.
+
+@item wG
+Memory operand suitable for TOC fusion memory references.
+
+@item wL
+Int constant that is the element number that the MFVSRLD instruction
+targets.
+
@item wQ
A memory address that will work with the @code{lq} and @code{stq}
instructions.
===================================================================
@@ -0,0 +1,35 @@
+/* { dg-do compile { target { powerpc*-*-linux* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-skip-if "" { powerpc*-*-*spe* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+
+/* Check code generation for direct move for long types. */
+
+void
+test (vector double *p)
+{
+ vector double v1 = *p;
+ vector double v2;
+ vector double v3;
+ vector double v4;
+
+ /* Force memory -> FPR load. */
+ __asm__ (" # reg %x0" : "+d" (v1));
+
+ /* force VSX -> GPR direct move. */
+ v2 = v1;
+ __asm__ (" # reg %0" : "+r" (v2));
+
+ /* Force GPR -> Altivec direct move. */
+ v3 = v2;
+ __asm__ (" # reg %x0" : "+v" (v3));
+ *p = v3;
+}
+
+/* { dg-final { scan-assembler "mfvsrd" } } */
+/* { dg-final { scan-assembler "mfvsrld" } } */
+/* { dg-final { scan-assembler "mtvsrdd" } } */
+
+