===================================================================
@@ -140,6 +140,10 @@ (define_constraint "wD"
(and (match_code "const_int")
(match_test "TARGET_VSX && (ival == VECTOR_ELEMENT_SCALAR_64BIT)")))
+(define_constraint "wE"
+ "Vector constant that can be loaded with the XXSPLTIB instruction."
+ (match_test "xxspltib_constant_nosplit (op, mode)"))
+
;; Extended fusion store
(define_memory_constraint "wF"
"Memory operand suitable for power9 fusion load/stores"
@@ -156,6 +160,12 @@ (define_constraint "wL"
(and (match_test "TARGET_DIRECT_MOVE_128")
(match_test "(ival == VECTOR_ELEMENT_MFVSRLD_64BIT)"))))
+;; Generate the XXORC instruction which was added in ISA 2.07
+(define_constraint "wM"
+ "vector constant with all 1's, ISA 2.07 and above"
+ (and (match_test "TARGET_P8_VECTOR")
+ (match_operand 0 "all_ones_constant")))
+
;; ISA 3.0 vector d-form addresses
(define_memory_constraint "wO"
"Memory operand suitable for the ISA 3.0 vector d-form instructions."
@@ -166,6 +176,10 @@ (define_memory_constraint "wQ"
"Memory operand suitable for the load/store quad instructions"
(match_operand 0 "quad_memory_operand"))
+(define_constraint "wS"
+ "Vector constant that can be loaded with XXSPLTIB & sign extension."
+ (match_test "xxspltib_constant_split (op, mode)"))
+
;; Altivec style load/store that ignores the bottom bits of the address
(define_memory_constraint "wZ"
"Indexed or indirect memory operand, ignoring the bottom 4 bits"
===================================================================
@@ -565,6 +565,38 @@ (define_predicate "easy_fp_constant"
}
})
+;; Return 1 if the operand is a CONST_VECTOR or VEC_DUPLICATE of a constant
+;; that can loaded with a XXSPLTIB instruction and then a VUPKHSB, VECSB2W or
+;; VECSB2D instruction.
+
+(define_predicate "xxspltib_constant_split"
+ (match_code "const_vector,vec_duplicate,const_int")
+{
+ int value = 256;
+ int num_insns = -1;
+
+ if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+ return false;
+
+ return num_insns > 1;
+})
+
+
+;; Return 1 if the operand is a CONST_VECTOR that can loaded directly with a
+;; XXSPLTIB instruction.
+
+(define_predicate "xxspltib_constant_nosplit"
+ (match_code "const_vector,vec_duplicate,const_int")
+{
+ int value = 256;
+ int num_insns = -1;
+
+ if (!xxspltib_constant_p (op, mode, &num_insns, &value))
+ return false;
+
+ return num_insns == 1;
+})
+
;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
;; vector register without using memory.
(define_predicate "easy_vector_constant"
@@ -583,7 +615,14 @@ (define_predicate "easy_vector_constant"
if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
{
- if (zero_constant (op, mode))
+ int value = 256;
+ int num_insns = -1;
+
+ if (zero_constant (op, mode) || all_ones_constant (op, mode))
+ return true;
+
+ if (TARGET_P9_VECTOR
+ && xxspltib_constant_p (op, mode, &num_insns, &value))
return true;
return easy_altivec_constant (op, mode);
@@ -662,6 +701,11 @@ (define_predicate "zero_constant"
(and (match_code "const_int,const_double,const_wide_int,const_vector")
(match_test "op == CONST0_RTX (mode)")))
+;; Return 1 if operand is constant -1 (scalars and vectors).
+(define_predicate "all_ones_constant"
+ (and (match_code "const_int,const_double,const_wide_int,const_vector")
+ (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)")))
+
;; Return 1 if operand is 0.0.
(define_predicate "zero_fp_constant"
(and (match_code "const_double")
@@ -1024,6 +1068,10 @@ (define_predicate "splat_input_operand"
mode = V2DFmode;
else if (mode == DImode)
mode = V2DImode;
+ else if (mode == SImode && TARGET_P9_VECTOR)
+ mode = V4SImode;
+ else if (mode == SFmode && TARGET_P9_VECTOR)
+ mode = V4SFmode;
else
gcc_unreachable ();
return memory_address_addr_space_p (mode, XEXP (op, 0),
===================================================================
@@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULA
#endif /* TREE_CODE */
extern bool easy_altivec_constant (rtx, machine_mode);
+extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
===================================================================
@@ -6241,6 +6241,128 @@ gen_easy_altivec_constant (rtx op)
gcc_unreachable ();
}
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.0
+ instructions (xxspltib, vupkhsb/vextsb2w/vextb2d).
+
+ Return the number of instructions needed (1 or 2) into the address pointed
+ via NUM_INSNS_PTR.
+
+ If NOSPLIT_P, only return true for constants that only generate the XXSPLTIB
+ instruction and can go in any VSX register. If !NOSPLIT_P, only return true
+ for constants that generate XXSPLTIB and need a sign extend operation, which
+ restricts us to the Altivec registers.
+
+ Allow either (vec_const [...]) or (vec_duplicate <const>). If OP is a valid
+ XXSPLTIB constant, return the constant being set via the CONST_PTR
+ pointer. */
+
+bool
+xxspltib_constant_p (rtx op,
+ machine_mode mode,
+ int *num_insns_ptr,
+ int *constant_ptr)
+{
+ size_t nunits = GET_MODE_NUNITS (mode);
+ size_t i;
+ HOST_WIDE_INT value;
+ rtx element;
+
+ /* Set the returned values to out of bound values. */
+ *num_insns_ptr = -1;
+ *constant_ptr = 256;
+
+ if (!TARGET_P9_VECTOR)
+ return false;
+
+ if (mode == VOIDmode)
+ mode = GET_MODE (op);
+
+ else if (mode != GET_MODE (op))
+ return false;
+
+ /* Handle (vec_duplicate <constant>). */
+ if (GET_CODE (op) == VEC_DUPLICATE)
+ {
+ if (mode != V16QImode && mode != V8HImode && mode != V4SImode
+ && mode != V2DImode)
+ return false;
+
+ element = XEXP (op, 0);
+ if (!CONST_INT_P (element))
+ return false;
+
+ value = INTVAL (element);
+ if (!IN_RANGE (value, -128, 127))
+ return false;
+ }
+
+ /* Handle (const_vector [...]). */
+ else if (GET_CODE (op) == CONST_VECTOR)
+ {
+ if (mode != V16QImode && mode != V8HImode && mode != V4SImode
+ && mode != V2DImode)
+ return false;
+
+ element = CONST_VECTOR_ELT (op, 0);
+ if (!CONST_INT_P (element))
+ return false;
+
+ value = INTVAL (element);
+ if (!IN_RANGE (value, -128, 127))
+ return false;
+
+ for (i = 1; i < nunits; i++)
+ {
+ element = CONST_VECTOR_ELT (op, i);
+ if (!CONST_INT_P (element))
+ return false;
+
+ if (value != INTVAL (element))
+ return false;
+ }
+
+ /* See if we could generate vspltisw/vspltish directly instead of
+ xxspltib + sign extend. Special case 0/-1 to allow getting
+ any VSX register instead of an Altivec register. */
+ if (!IN_RANGE (value, -1, 0) && EASY_VECTOR_15 (value)
+ && (mode == V4SImode || mode == V8HImode))
+ return false;
+ }
+
+ /* Handle integer constants being loaded into the upper part of the VSX
+ register as a scalar. If the value isn't 0/-1, only allow it if
+ the mode can go in Altivec registers. */
+ else if (CONST_INT_P (op))
+ {
+ if (!SCALAR_INT_MODE_P (mode))
+ return false;
+
+ value = INTVAL (op);
+ if (!IN_RANGE (value, -128, 127))
+ return false;
+
+ if (!IN_RANGE (value, -1, 0)
+ && (reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_VALID) == 0)
+ return false;
+ }
+
+ else
+ return false;
+
+ /* Return # of instructions and the constant byte for XXSPLTIB. */
+ if (mode == V16QImode)
+ *num_insns_ptr = 1;
+
+ else if (IN_RANGE (value, -1, 0))
+ *num_insns_ptr = 1;
+
+ else
+ *num_insns_ptr = 2;
+
+ *constant_ptr = (int) value;
+ return true;
+}
+
const char *
output_vec_const_move (rtx *operands)
{
@@ -6254,23 +6376,60 @@ output_vec_const_move (rtx *operands)
if (TARGET_VSX)
{
+ bool dest_vmx_p = ALTIVEC_REGNO_P (REGNO (dest));
+ int xxspltib_value = 256;
+ int num_insns = -1;
+
if (zero_constant (vec, mode))
- return "xxlxor %x0,%x0,%x0";
+ {
+ if (TARGET_P9_VECTOR)
+ return "xxspltib %x0,0";
+
+ else if (dest_vmx_p)
+ return "vspltisw %0,0";
+
+ else
+ return "xxlxor %x0,%x0,%x0";
+ }
+
+ if (all_ones_constant (vec, mode))
+ {
+ if (TARGET_P9_VECTOR)
+ return "xxspltib %x0,255";
+
+ else if (dest_vmx_p)
+ return "vspltisw %0,-1";
- if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
- return "xxlorc %x0,%x0,%x0";
+ else if (TARGET_P8_VECTOR)
+ return "xxlorc %x0,%x0,%x0";
- if ((mode == V2DImode || mode == V1TImode)
- && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
- && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
- return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
+ else
+ gcc_unreachable ();
+ }
+
+ if (TARGET_P9_VECTOR
+ && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
+ {
+ if (num_insns == 1)
+ {
+ operands[2] = GEN_INT (xxspltib_value & 0xff);
+ return "xxspltib %x0,%2";
+ }
+
+ return "#";
+ }
}
if (TARGET_ALTIVEC)
{
rtx splat_vec;
+
+ gcc_assert (ALTIVEC_REGNO_P (REGNO (dest)));
if (zero_constant (vec, mode))
- return "vxor %0,%0,%0";
+ return "vspltisw %0,0";
+
+ if (all_ones_constant (vec, mode))
+ return "vspltisw %0,-1";
/* Do we need to construct a value using VSLDOI? */
shift = vspltis_shifted (vec);
@@ -6543,6 +6702,15 @@ rs6000_expand_vector_init (rtx target, r
return;
}
+ /* Word values on ISA 3.0 can use mtvsrws, lxvwsx, or vspltisw. V4SF is
+ complicated since scalars are stored as doubles in the registers. */
+ if (TARGET_P9_VECTOR && mode == V4SImode && all_same
+ && VECTOR_MEM_VSX_P (mode))
+ {
+ emit_insn (gen_vsx_splat_v4si (target, XVECEXP (vals, 0, 0)));
+ return;
+ }
+
/* With single precision floating point on VSX, know that internally single
precision is actually represented as a double, and either make 2 V2DF
vectors, and convert these vectors to single precision, or do one
@@ -6551,14 +6719,23 @@ rs6000_expand_vector_init (rtx target, r
{
if (all_same)
{
- rtx freg = gen_reg_rtx (V4SFmode);
- rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
- rtx cvt = ((TARGET_XSCVDPSPN)
- ? gen_vsx_xscvdpspn_scalar (freg, sreg)
- : gen_vsx_xscvdpsp_scalar (freg, sreg));
+ rtx op0 = XVECEXP (vals, 0, 0);
+
+ if (TARGET_P9_VECTOR)
+ emit_insn (gen_vsx_splat_v4sf (target, op0));
- emit_insn (cvt);
- emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
+ else
+ {
+ rtx freg = gen_reg_rtx (V4SFmode);
+ rtx sreg = force_reg (SFmode, op0);
+ rtx cvt = ((TARGET_XSCVDPSPN)
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
+
+ emit_insn (cvt);
+ emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg,
+ const0_rtx));
+ }
}
else
{
@@ -8326,12 +8503,16 @@ rs6000_legitimize_reload_address (rtx x,
{
bool reg_offset_p = reg_offset_addressing_ok_p (mode);
- /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
- DFmode/DImode MEM. */
+ /* Nasty hack for vsx_splat_v2df/v2di load from mem, which takes a
+ DFmode/DImode MEM. Ditto for ISA 3.0 vsx_splat_v4sf/v4si. */
if (reg_offset_p
&& opnum == 1
&& ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
- || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
+ || (mode == DImode && recog_data.operand_mode[0] == V2DImode)
+ || (mode == SFmode && recog_data.operand_mode[0] == V4SFmode
+ && TARGET_P9_VECTOR)
+ || (mode == SImode && recog_data.operand_mode[0] == V4SImode
+ && TARGET_P9_VECTOR)))
reg_offset_p = false;
/* We must recognize output that we have already generated ourselves. */
@@ -20111,10 +20292,8 @@ rs6000_output_move_128bit (rtx operands[
if (dest_gpr_p)
return "#";
- else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
- return "xxlxor %x0,%x0,%x0";
-
- else if (TARGET_ALTIVEC && dest_vmx_p)
+ else if ((dest_vmx_p && TARGET_ALTIVEC)
+ || (dest_vsx_p && TARGET_VSX))
return output_vec_const_move (operands);
}
===================================================================
@@ -55,8 +55,7 @@ (define_mode_iterator VSX_L [V16QI
(KF "FLOAT128_VECTOR_P (KFmode)")
(TF "FLOAT128_VECTOR_P (TFmode)")])
-;; Iterator for memory move. Handle TImode specially to allow
-;; it to use gprs as well as vsx registers.
+;; Iterator for memory moves.
(define_mode_iterator VSX_M [V16QI
V8HI
V4SI
@@ -65,18 +64,8 @@ (define_mode_iterator VSX_M [V16QI
V2DF
V1TI
(KF "FLOAT128_VECTOR_P (KFmode)")
- (TF "FLOAT128_VECTOR_P (TFmode)")])
-
-(define_mode_iterator VSX_M2 [V16QI
- V8HI
- V4SI
- V2DI
- V4SF
- V2DF
- V1TI
- (KF "FLOAT128_VECTOR_P (KFmode)")
- (TF "FLOAT128_VECTOR_P (TFmode)")
- (TI "TARGET_VSX_TIMODE")])
+ (TF "FLOAT128_VECTOR_P (TFmode)")
+ (TI "TARGET_VSX_TIMODE")])
;; Map into the appropriate load/store name based on the type
(define_mode_attr VSm [(V16QI "vw4")
@@ -270,6 +259,10 @@ (define_mode_attr VS_64dm [(V2DF "wk")
(define_mode_attr VS_64reg [(V2DF "ws")
(V2DI "wi")])
+;; Iterators for loading constants with xxspltib
+(define_mode_iterator VSINT_84 [V4SI V2DI])
+(define_mode_iterator VSINT_842 [V8HI V4SI V2DI])
+
;; Constants for creating unspecs
(define_c_enum "unspec"
[UNSPEC_VSX_CONCAT
@@ -299,6 +292,7 @@ (define_c_enum "unspec"
UNSPEC_VSX_XVCVUXDDP
UNSPEC_VSX_XVCVDPSXDS
UNSPEC_VSX_XVCVDPUXDS
+ UNSPEC_VSX_SIGN_EXTEND
])
;; VSX moves
@@ -769,92 +763,177 @@ (define_split
(const_int 64)))]
"")
-(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=ZwO,<VSr>,<VSr>,?ZwO,?<VSa>,?<VSa>,r,we,wQ,?&r,??Y,??r,??r,<VSr>,?<VSa>,*r,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,ZwO,<VSr>,<VSa>,ZwO,<VSa>,we,b,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (register_operand (operands[0], <MODE>mode)
- || register_operand (operands[1], <MODE>mode))"
+;; Vector constants that can be generated with XXSPLTIB that was added in ISA
+;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
+(define_insn "xxspltib_v16qi"
+ [(set (match_operand:V16QI 0 "vsx_register_operand" "=wa")
+ (vec_duplicate:V16QI (match_operand:SI 1 "s8bit_cint_operand" "i")))]
+ "TARGET_P9_VECTOR"
{
- return rs6000_output_move_128bit (operands);
+ operands[2] = GEN_INT (INTVAL (operands[1]) & 0xff);
+ return "xxspltib %x0,%2";
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,mffgpr,mftgpr,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
- (set_attr "length" "4,4,4,4,4,4,8,4,12,12,12,12,16,4,4,*,16,4,4")])
+ [(set_attr "type" "vecperm")])
-;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
-;; use of TImode is for unions. However for plain data movement, slightly
-;; favor the vector loads
-(define_insn "*vsx_movti_64bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,r,we,v,v,wZ,wQ,&r,Y,r,r,?r")
- (match_operand:TI 1 "input_operand" "wa,ZwO,wa,O,we,b,W,wZ,v,r,wQ,r,Y,r,n"))]
- "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
- && (register_operand (operands[0], TImode)
- || register_operand (operands[1], TImode))"
+(define_insn "xxspltib_<mode>_nosplit"
+ [(set (match_operand:VSINT_842 0 "vsx_register_operand" "=wa")
+ (match_operand:VSINT_842 1 "xxspltib_constant_nosplit" "wE"))]
+ "TARGET_P9_VECTOR"
{
- return rs6000_output_move_128bit (operands);
+ rtx op1 = operands[1];
+ int value = 256;
+ int num_insns = -1;
+
+ if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
+ || num_insns != 1)
+ gcc_unreachable ();
+
+ operands[2] = GEN_INT (value & 0xff);
+ return "xxspltib %x0,%2";
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,mffgpr,mftgpr,vecsimple,vecstore,vecload,store,load,store,load,*,*")
- (set_attr "length" "4,4,4,4,8,4,16,4,4,8,8,8,8,8,8")])
+ [(set_attr "type" "vecperm")])
-(define_insn "*vsx_movti_32bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=ZwO,wa,wa,wa,v,v,wZ,Q,Y,????r,????r,????r,r")
- (match_operand:TI 1 "input_operand" "wa,ZwO,wa,O,W,wZ,v,r,r,Q,Y,r,n"))]
- "! TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
- && (register_operand (operands[0], TImode)
- || register_operand (operands[1], TImode))"
+(define_insn_and_split "*xxspltib_<mode>_split"
+ [(set (match_operand:VSINT_842 0 "altivec_register_operand" "=v")
+ (match_operand:VSINT_842 1 "xxspltib_constant_split" "wS"))]
+ "TARGET_P9_VECTOR"
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
- switch (which_alternative)
- {
- case 0:
- return "stxvd2x %x1,%y0";
+ int value = 256;
+ int num_insns = -1;
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx tmp = ((can_create_pseudo_p ())
+ ? gen_reg_rtx (V16QImode)
+ : gen_lowpart (V16QImode, op0));
- case 1:
- return "lxvd2x %x0,%y1";
+ if (!xxspltib_constant_p (op1, <MODE>mode, &num_insns, &value)
+ || num_insns != 2)
+ gcc_unreachable ();
- case 2:
- return "xxlor %x0,%x1,%x1";
+ emit_insn (gen_xxspltib_v16qi (tmp, GEN_INT (value)));
- case 3:
- return "xxlxor %x0,%x0,%x0";
+ if (<MODE>mode == V2DImode)
+ emit_insn (gen_vsx_sign_extend_qi_v2di (op0, tmp));
- case 4:
- return output_vec_const_move (operands);
+ else if (<MODE>mode == V4SImode)
+ emit_insn (gen_vsx_sign_extend_qi_v4si (op0, tmp));
- case 5:
- return "stvx %1,%y0";
+ else if (<MODE>mode == V8HImode)
+ emit_insn (gen_altivec_vupkhsb (op0, tmp));
- case 6:
- return "lvx %0,%y1";
-
- case 7:
- if (TARGET_STRING)
- return \"stswi %1,%P0,16\";
-
- case 8:
- return \"#\";
-
- case 9:
- /* If the address is not used in the output, we can use lsi. Otherwise,
- fall through to generating four loads. */
- if (TARGET_STRING
- && ! reg_overlap_mentioned_p (operands[0], operands[1]))
- return \"lswi %0,%P1,16\";
- /* ... fall through ... */
-
- case 10:
- case 11:
- case 12:
- return \"#\";
- default:
- gcc_unreachable ();
- }
+ else
+ gcc_unreachable ();
+
+ DONE;
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,store,load,load, *, *")
- (set_attr "update" " *, *, *, *, *, *, *, yes, yes, yes, yes, *, *")
- (set_attr "length" " 4, 4, 4, 4, 8, 4, 4, 16, 16, 16, 16,16,16")
- (set (attr "cell_micro") (if_then_else (match_test "TARGET_STRING")
- (const_string "always")
- (const_string "conditional")))])
+ [(set_attr "type" "vecperm")
+ (set_attr "length" "8")])
+
+
+;; VSX store VSX load
+;; VSX move direct move
+;; direct move GPR store
+;; GPR load GPR move
+;; P9 const. AVX const.
+;; P9 const. 0
+;; -1 GPR const.
+;; AVX store AVX load
+
+;; Prefer using vector registers over GPRs. Prefer using ISA 3.0's XXSPLTISB
+;; or Altivec VSPLITW 0/-1 over XXLXOR/XXLORC to set a register to all 0's or
+;; all 1's, since the machine does not have to wait for the previous
+;; instruction using the register being set (such as a store waiting on a slow
+;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
+(define_insn "*vsx_mov<mode>_64bit"
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=wOZ, <VSa>,
+ <VSa>, *r,
+ *wo, $Y,
+ ??r, ??r,
+ wo, v,
+ v, ?<VSa>,
+ ?wh, ??r,
+ wZ, v")
+
+ (match_operand:VSX_M 1 "input_operand" "<VSa>, wOZ,
+ <VSa>, wo,
+ r, ??r,
+ Y, ??r,
+ wE, W,
+ wS, j,
+ wM, WjwM,
+ wZ, v"))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ return rs6000_output_move_128bit (operands);
+}
+ [(set_attr "type" "vecstore, vecload,
+ vecsimple, mftgpr,
+ mffgpr, store,
+ load, *,
+ vecsimple, vecsimple,
+ vecsimple, vecsimple,
+ vecsimple, *,
+ vecstore, vecload")
+
+ (set_attr "length" "4, 4,
+ 4, 8,
+ 4, 8,
+ 8, 8,
+ 4, 16,
+ 8, 4,
+ 4, 16,
+ 4, 4")])
+
+;; VSX store VSX load
+;; VSX move GPR store
+;; GPR load GPR move
+;; P9 const. AVX const.
+;; P9 const. 0
+;; -1 GPR const.
+;; AVX store AVX load
+
+(define_insn "*vsx_mov<mode>_32bit"
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=wOZ, <VSa>,
+ <VSa>, $Y,
+ ???r, ???r,
+ wo, v,
+ v, ?<VSa>,
+ ?wh, ???r,
+ wZ, v")
+
+ (match_operand:VSX_M 1 "input_operand" "<VSa>, wOZ,
+ <VSa>, ???r,
+ Y, ???r,
+ wE, W,
+ wS, j,
+ wM, WjwM,
+ wZ, v"))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (register_operand (operands[0], <MODE>mode)
+ || register_operand (operands[1], <MODE>mode))"
+{
+ return rs6000_output_move_128bit (operands);
+}
+ [(set_attr "type" "vecstore, vecload,
+ vecsimple, store,
+ load, *,
+ vecsimple, vecsimple,
+ vecsimple, vecsimple,
+ vecsimple, *,
+ vecstore, vecload")
+
+ (set_attr "length" "4, 4,
+ 4, 16,
+ 16, 16,
+ 4, 16,
+ 8, 4,
+ 4, 32,
+ 4, 4")])
;; Explicit load/store expanders for the builtin functions
(define_expand "vsx_load_<mode>"
@@ -2354,7 +2433,52 @@ (define_insn "vsx_splat_<mode>"
lxvdsx %x0,%y1"
[(set_attr "type" "vecperm,vecperm,vecload,vecperm,vecperm,vecload")])
-;; V4SF/V4SI splat
+;; V4SI splat (ISA 3.0)
+;; When SI's are allowed in VSX registers, add XXSPLTW support
+(define_expand "vsx_splat_<mode>"
+ [(set (match_operand:VSX_W 0 "vsx_register_operand" "")
+ (vec_duplicate:VSX_W
+ (match_operand:<VS_scalar> 1 "splat_input_operand" "")))]
+ "TARGET_P9_VECTOR"
+{
+ if (MEM_P (operands[1]))
+ operands[1] = rs6000_address_for_fpconvert (operands[1]);
+ else if (!REG_P (operands[1]))
+ operands[1] = force_reg (<VS_scalar>mode, operands[1]);
+})
+
+(define_insn "*vsx_splat_v4si_internal"
+ [(set (match_operand:V4SI 0 "vsx_register_operand" "=wa,wa")
+ (vec_duplicate:V4SI
+ (match_operand:SI 1 "reg_or_indexed_operand" "r,Z")))]
+ "TARGET_P9_VECTOR"
+ "@
+ mtvsrws %x0,%1
+ lxvwsx %x0,%y1"
+ [(set_attr "type" "mftgpr,vecload")])
+
+;; V4SF splat (ISA 3.0)
+(define_insn_and_split "*vsx_splat_v4sf_internal"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa,wa,wa")
+ (vec_duplicate:V4SF
+ (match_operand:SF 1 "reg_or_indexed_operand" "Z,wy,r")))]
+ "TARGET_P9_VECTOR"
+ "@
+ lxvwsx %x0,%y1
+ #
+ mtvsrws %x0,%1"
+ "&& reload_completed && vsx_register_operand (operands[1], SFmode)"
+ [(set (match_dup 0)
+ (unspec:V4SF [(match_dup 1)] UNSPEC_VSX_CVDPSPN))
+ (set (match_dup 0)
+ (vec_duplicate:V4SF
+ (vec_select:SF (match_dup 0)
+ (parallel [(const_int 0)]))))]
+ ""
+ [(set_attr "type" "vecload,vecperm,mftgpr")
+ (set_attr "length" "4,8,4")])
+
+;; V4SF/V4SI splat from a vector element
(define_insn "vsx_xxspltw_<mode>"
[(set (match_operand:VSX_W 0 "vsx_register_operand" "=wf,?<VSa>")
(vec_duplicate:VSX_W
@@ -2597,21 +2721,50 @@ (define_insn_and_split "*vsx_reduc_<VEC_
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "short_cint_operand" ""))
- (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
- (mem:VSX_M2 (plus:P (match_dup 0)
- (match_operand:P 3 "int_reg_operand" ""))))]
+ (set (match_operand:VSX_M 2 "vsx_register_operand" "")
+ (mem:VSX_M (plus:P (match_dup 0)
+ (match_operand:P 3 "int_reg_operand" ""))))]
"TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
- "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])
(define_peephole
[(set (match_operand:P 0 "base_reg_operand" "")
(match_operand:P 1 "short_cint_operand" ""))
- (set (match_operand:VSX_M2 2 "vsx_register_operand" "")
- (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "")
- (match_dup 0))))]
+ (set (match_operand:VSX_M 2 "vsx_register_operand" "")
+ (mem:VSX_M (plus:P (match_operand:P 3 "int_reg_operand" "")
+ (match_dup 0))))]
"TARGET_VSX && TARGET_P8_FUSION && !TARGET_P9_VECTOR"
- "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3"
+ "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M:VSm>x %x2,%0,%3"
[(set_attr "length" "8")
(set_attr "type" "vecload")])
+
+
+;; ISA 3.0 vector extend sign support
+
+(define_insn "vsx_sign_extend_qi_<mode>"
+ [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
+ (unspec:VSINT_84
+ [(match_operand:V16QI 1 "vsx_register_operand" "v")]
+ UNSPEC_VSX_SIGN_EXTEND))]
+ "TARGET_P9_VECTOR"
+ "vextsb2<wd> %0,%1"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_sign_extend_hi_<mode>"
+ [(set (match_operand:VSINT_84 0 "vsx_register_operand" "=v")
+ (unspec:VSINT_84
+ [(match_operand:V8HI 1 "vsx_register_operand" "v")]
+ UNSPEC_VSX_SIGN_EXTEND))]
+ "TARGET_P9_VECTOR"
+ "vextsh2<wd> %0,%1"
+ [(set_attr "type" "vecsimple")])
+
+(define_insn "*vsx_sign_extend_si_v2di"
+ [(set (match_operand:V2DI 0 "vsx_register_operand" "=v")
+ (unspec:V2DI [(match_operand:V4SI 1 "vsx_register_operand" "v")]
+ UNSPEC_VSX_SIGN_EXTEND))]
+ "TARGET_P9_VECTOR"
+ "vextsw2d %0,%1"
+ [(set_attr "type" "vecsimple")])
===================================================================
@@ -3214,6 +3214,9 @@ Floating point register if the LFIWZX in
@item wD
Int constant that is the element number of the 64-bit scalar in a vector.
+@item wE
+Vector constant that can be loaded with the XXSPLTIB instruction.
+
@item wF
Memory operand suitable for power9 fusion load/stores.
@@ -3224,6 +3227,9 @@ Memory operand suitable for TOC fusion m
Int constant that is the element number that the MFVSRLD instruction
targets.
+@item wM
+Vector constant with all bits set to 1.
+
@item wO
A memory operand suitable for the ISA 3.0 vector d-form instructions.
@@ -3231,6 +3237,9 @@ A memory operand suitable for the ISA 3.
A memory address that will work with the @code{lq} and @code{stq}
instructions.
+@item wS
+Vector constant that can be loaded with XXSPLTIB & sign extension.
+
@item h
@samp{MQ}, @samp{CTR}, or @samp{LINK} register
===================================================================
@@ -0,0 +1,27 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+vector int
+foo_r (int a)
+{
+ return (vector int) { a, a, a, a }; /* mtvsrws */
+}
+
+vector int
+foo_r2 (int a)
+{
+ return vec_splats (a); /* mtvsrws */
+}
+
+vector int
+foo_p (int *a)
+{
+ return (vector int) { *a, *a, *a, *a }; /* lxvwsx */
+}
+
+/* { dg-final { scan-assembler-times "mtvsrws" 2 } } */
+/* { dg-final { scan-assembler-times "lxvwsx" 1 } } */
===================================================================
@@ -0,0 +1,38 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+vector float
+foo_r (float a)
+{
+ return (vector float) { a, a, a, a }; /* xscvdpspn/xxspltw */
+}
+
+vector float
+foo_r2 (float a)
+{
+ return vec_splats (a); /* xscvdpspn/xxspltw */
+}
+
+vector float
+foo_g (float *a)
+{
+ float f = *a;
+
+ __asm__ (" # %0" : "+r" (f));
+ return (vector float) { f, f, f, f }; /* mtvsrws */
+}
+
+vector float
+foo_p (float *a)
+{
+ return (vector float) { *a, *a, *a, *a }; /* lxvwsx */
+}
+
+/* { dg-final { scan-assembler-times "xscvdpspn" 2 } } */
+/* { dg-final { scan-assembler-times "xxspltw" 2 } } */
+/* { dg-final { scan-assembler-times "mtvsrws" 1 } } */
+/* { dg-final { scan-assembler-times "lxvwsx" 1 } } */
===================================================================
@@ -0,0 +1,61 @@
+/* { dg-do compile { target { powerpc64le-*-* } } } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
+/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+
+#include <altivec.h>
+
+typedef vector signed char v16qi_t;
+typedef vector short v8hi_t;
+typedef vector int v4si_t;
+typedef vector long long v2di_t;
+
+void v16qi_0a (v16qi_t *p) { *p = (v16qi_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; }
+void v8hi_0a (v8hi_t *p) { *p = (v8hi_t) { 0, 0, 0, 0, 0, 0, 0, 0 }; }
+void v4si_0a (v4si_t *p) { *p = (v4si_t) { 0, 0, 0, 0 }; }
+void v2di_0a (v2di_t *p) { *p = (v2di_t) { 0, 0 }; }
+
+void v16qi_0b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)0); }
+void v8hi_0b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)0); }
+void v4si_0b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)0); }
+void v2di_0b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)0); }
+
+void v16qi_m1a (v16qi_t *p) { *p = (v16qi_t) { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; }
+void v8hi_m1a (v8hi_t *p) { *p = (v8hi_t) { -1, -1, -1, -1, -1, -1, -1, -1 }; }
+void v4si_m1a (v4si_t *p) { *p = (v4si_t) { -1, -1, -1, -1 }; }
+void v2di_m1a (v2di_t *p) { *p = (v2di_t) { -1, -1 }; }
+
+void v16qi_m1b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)-1); }
+void v8hi_m1b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)-1); }
+void v4si_m1b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)-1); }
+void v2di_m1b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)-1); }
+
+void v16qi_5a (v16qi_t *p) { *p = (v16qi_t) { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 }; }
+void v8hi_5a (v8hi_t *p) { *p = (v8hi_t) { 5, 5, 5, 5, 5, 5, 5, 5 }; }
+void v4si_5a (v4si_t *p) { *p = (v4si_t) { 5, 5, 5, 5 }; }
+void v2di_5a (v2di_t *p) { *p = (v2di_t) { 5, 5 }; }
+
+void v16qi_5b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)5); }
+void v8hi_5b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)5); }
+void v4si_5b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)5); }
+void v2di_5b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)5); }
+
+void v16qi_33a (v16qi_t *p) { *p = (v16qi_t) { 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33 }; }
+void v8hi_33a (v8hi_t *p) { *p = (v8hi_t) { 33, 33, 33, 33, 33, 33, 33, 33 }; }
+void v4si_33a (v4si_t *p) { *p = (v4si_t) { 33, 33, 33, 33 }; }
+void v2di_33a (v2di_t *p) { *p = (v2di_t) { 33, 33 }; }
+
+void v16qi_33b (v16qi_t *p) { *p = (v16qi_t) vec_splats ((signed char)33); }
+void v8hi_33b (v8hi_t *p) { *p = (v8hi_t) vec_splats ((short)33); }
+void v4si_33b (v4si_t *p) { *p = (v4si_t) vec_splats ((int)33); }
+void v2di_33b (v2di_t *p) { *p = (v2di_t) vec_splats ((long long)33); }
+
+/* { dg-final { scan-assembler "xxspltib" } } */
+/* { dg-final { scan-assembler "vextsb2d" } } */
+/* { dg-final { scan-assembler "vextsb2w" } } */
+/* { dg-final { scan-assembler "vupk\[hl\]sb" } } */
+/* { dg-final { scan-assembler-not "lxvd2x" } } */
+/* { dg-final { scan-assembler-not "lxvw4x" } } */
+/* { dg-final { scan-assembler-not "lxv " } } */
+/* { dg-final { scan-assembler-not "lxvx" } } */
+/* { dg-final { scan-assembler-not "lvx" } } */
===================================================================
@@ -3,7 +3,7 @@
/* { dg-require-effective-target powerpc_vsx_ok } */
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power7" } } */
/* { dg-options "-O3 -mcpu=power7" } */
-/* { dg-final { scan-assembler "xxlxor" } } */
+/* { dg-final { scan-assembler "xxlxor\|vspltis\[bhw\]" } } */
/* { dg-final { scan-assembler-not "lxvd2x" } } */
/* { dg-final { scan-assembler-not "lxvw4x" } } */
/* { dg-final { scan-assembler-not "lvx" } } */