===================================================================
@@ -562,6 +562,14 @@ (define_predicate "easy_vector_constant_
return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
})
+;; Return true if this is an easy altivec constant that we form
+;; by using VSLDOI.
+(define_predicate "easy_vector_constant_vsldoi"
+ (and (match_code "const_vector")
+ (and (match_test "TARGET_ALTIVEC")
+ (and (match_test "easy_altivec_constant (op, mode)")
+ (match_test "vspltis_shifted (op) != 0")))))
+
;; Return 1 if operand is constant zero (scalars and vectors).
(define_predicate "zero_constant"
(and (match_code "const_int,const_double,const_wide_int,const_vector")
===================================================================
@@ -31,6 +31,7 @@ extern void init_cumulative_args (CUMULA
#endif /* TREE_CODE */
extern bool easy_altivec_constant (rtx, machine_mode);
+extern int vspltis_shifted (rtx);
extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
extern int num_insns_constant (rtx, machine_mode);
===================================================================
@@ -5448,6 +5448,96 @@ vspltis_constant (rtx op, unsigned step,
return true;
}
+/* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
+ instruction, filling in the bottom elements with 0 or -1.
+
+ Return 0 if the constant cannot be generated with VSLDOI. Return positive
+ for the number of zeroes to shift in, or negative for the number of 0xff
+ bytes to shift in.
+
+ OP is a CONST_VECTOR. */
+
+int
+vspltis_shifted (rtx op)
+{
+ machine_mode mode = GET_MODE (op);
+ machine_mode inner = GET_MODE_INNER (mode);
+
+ unsigned i, j;
+ unsigned nunits;
+ unsigned mask;
+
+ HOST_WIDE_INT val;
+
+ if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
+ return false;
+
+ /* We need to create pseudo registers to do the shift, so don't recognize
+ shift vector constants after reload. */
+ if (!can_create_pseudo_p ())
+ return false;
+
+ nunits = GET_MODE_NUNITS (mode);
+ mask = GET_MODE_MASK (inner);
+
+ val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
+
+ /* Check if the value can really be the operand of a vspltis[bhw]. */
+ if (EASY_VECTOR_15 (val))
+ ;
+
+ /* Also check if we are loading up the most significant bit which can be done
+ by loading up -1 and shifting the value left by -1. */
+ else if (EASY_VECTOR_MSB (val, inner))
+ ;
+
+ else
+ return 0;
+
+ /* Check if VAL is present in every STEP-th element until we find elements
+ that are 0 or all 1 bits. */
+ for (i = 1; i < nunits; ++i)
+ {
+ unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
+ HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
+
+ /* If the value isn't the splat value, check for the remaining elements
+ being 0/-1. */
+ if (val != elt_val)
+ {
+ if (elt_val == 0)
+ {
+ for (j = i+1; j < nunits; ++j)
+ {
+ unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
+ if (const_vector_elt_as_int (op, elt2) != 0)
+ return 0;
+ }
+
+ return (nunits - i) * GET_MODE_SIZE (inner);
+ }
+
+ else if ((elt_val & mask) == mask)
+ {
+ for (j = i+1; j < nunits; ++j)
+ {
+ unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
+ if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
+ return 0;
+ }
+
+ return -((nunits - i) * GET_MODE_SIZE (inner));
+ }
+
+ else
+ return 0;
+ }
+ }
+
+ /* If all elements are equal, we don't need to do VLSDOI. */
+ return 0;
+}
+
/* Return true if OP is of the given MODE and can be synthesized
with a vspltisb, vspltish or vspltisw. */
@@ -5512,6 +5602,9 @@ easy_altivec_constant (rtx op, machine_m
if (vspltis_constant (op, step, copies))
return true;
+ if (vspltis_shifted (op) != 0)
+ return true;
+
return false;
}
@@ -5555,7 +5648,7 @@ gen_easy_altivec_constant (rtx op)
const char *
output_vec_const_move (rtx *operands)
{
- int cst, cst2;
+ int cst, cst2, shift;
machine_mode mode;
rtx dest, vec;
@@ -5568,10 +5661,13 @@ output_vec_const_move (rtx *operands)
if (zero_constant (vec, mode))
return "xxlxor %x0,%x0,%x0";
+ if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
+ return "xxlorc %x0,%x0,%x0";
+
if ((mode == V2DImode || mode == V1TImode)
&& INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
&& INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
- return "vspltisw %0,-1";
+ return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
}
if (TARGET_ALTIVEC)
@@ -5580,6 +5676,11 @@ output_vec_const_move (rtx *operands)
if (zero_constant (vec, mode))
return "vxor %0,%0,%0";
+ /* Do we need to construct a value using VSLDOI? */
+ shift = vspltis_shifted (vec);
+ if (shift != 0)
+ return "#";
+
splat_vec = gen_easy_altivec_constant (vec);
gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
operands[1] = XEXP (splat_vec, 0);
===================================================================
@@ -1980,7 +1980,7 @@ typedef struct rs6000_args
&& ((n) & 1) == 0)
#define EASY_VECTOR_MSB(n,mode) \
- (((unsigned HOST_WIDE_INT)n) == \
+ ((((unsigned HOST_WIDE_INT)n) & GET_MODE_MASK (mode)) == \
((((unsigned HOST_WIDE_INT)GET_MODE_MASK (mode)) + 1) >> 1))
===================================================================
@@ -311,6 +311,47 @@ (define_split
operands[4] = gen_rtx_PLUS (op_mode, operands[0], operands[0]);
})
+(define_split
+ [(set (match_operand:VM 0 "altivec_register_operand" "")
+ (match_operand:VM 1 "easy_vector_constant_vsldoi" ""))]
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode) && can_create_pseudo_p ()"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4) (match_dup 5))
+ (set (match_dup 0)
+ (unspec:VM [(match_dup 2)
+ (match_dup 4)
+ (match_dup 6)]
+ UNSPEC_VSLDOI))]
+{
+ rtx op1 = operands[1];
+ int elt = (BYTES_BIG_ENDIAN) ? 0 : GET_MODE_NUNITS (<MODE>mode) - 1;
+ HOST_WIDE_INT val = const_vector_elt_as_int (op1, elt);
+ rtx rtx_val = GEN_INT (val);
+ int shift = vspltis_shifted (op1);
+ int nunits = GET_MODE_NUNITS (<MODE>mode);
+ int i;
+
+ gcc_assert (shift != 0);
+ operands[2] = gen_reg_rtx (<MODE>mode);
+ operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, rtvec_alloc (nunits));
+ operands[4] = gen_reg_rtx (<MODE>mode);
+
+ if (shift < 0)
+ {
+ operands[5] = CONSTM1_RTX (<MODE>mode);
+ operands[6] = GEN_INT (-shift);
+ }
+ else
+ {
+ operands[5] = CONST0_RTX (<MODE>mode);
+ operands[6] = GEN_INT (shift);
+ }
+
+ /* Populate the constant vectors. */
+ for (i = 0; i < nunits; i++)
+ XVECEXP (operands[3], 0, i) = rtx_val;
+})
+
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec:SI [(reg:SI 109)] UNSPEC_GET_VRSAVE))]
===================================================================
@@ -0,0 +1,34 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */
+/* { dg-options "-mcpu=power6 -maltivec" } */
+
+vector unsigned char
+foo_char (void)
+{
+ return (vector unsigned char) {
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+ 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80
+ };
+}
+
+vector unsigned short
+foo_short (void)
+{
+ return (vector unsigned short) {
+ 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000
+ };
+}
+
+vector unsigned int
+foo_int (void)
+{
+ return (vector unsigned int) {
+ 0x80000000u, 0x80000000u, 0x80000000u, 0x80000000u,
+ };
+}
+
+/* { dg-final { scan-assembler-times "vspltisw" 3 } } */
+/* { dg-final { scan-assembler-times "vslb" 1 } } */
+/* { dg-final { scan-assembler-times "vslh" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
===================================================================
@@ -0,0 +1,48 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */
+/* { dg-options "-mcpu=power6 -maltivec" } */
+
+vector unsigned char
+foo_char (void)
+{
+ return (vector unsigned char) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+#else
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
+#endif
+ };
+}
+
+vector unsigned short
+foo_short (void)
+{
+ return (vector unsigned short) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x8000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000
+#else
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8000
+#endif
+ };
+}
+
+vector unsigned int
+foo_int (void)
+{
+ return (vector unsigned int) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x80000000u, 0x00000000u, 0x00000000u, 0x00000000u,
+#else
+ 0x00000000u, 0x00000000u, 0x00000000u, 0x80000000u,
+#endif
+ };
+}
+
+/* { dg-final { scan-assembler-times "vspltisw" 3 } } */
+/* { dg-final { scan-assembler-times "vsldoi" 3 } } */
+/* { dg-final { scan-assembler-times "vslb" 1 } } */
+/* { dg-final { scan-assembler-times "vslh" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
===================================================================
@@ -0,0 +1,48 @@
+/* { dg-do compile { target powerpc*-*-* } } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power6" } } */
+/* { dg-options "-mcpu=power6 -maltivec" } */
+
+
+vector unsigned char
+foo_char (void)
+{
+ return (vector unsigned char) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x80, 0x80, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+#else
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x80, 0x80
+#endif
+ };
+}
+
+vector unsigned short
+foo_short (void)
+{
+ return (vector unsigned short) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x8000, 0x8000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
+#else
+ 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x8000, 0x8000
+#endif
+ };
+}
+
+vector unsigned int
+foo_int (void)
+{
+ return (vector unsigned int) {
+#if __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
+ 0x80000000u, 0x80000000u, 0xffffffffu, 0xffffffffu,
+#else
+ 0xffffffffu, 0xffffffffu, 0x80000000u, 0x80000000u,
+#endif
+ };
+}
+
+/* { dg-final { scan-assembler-times "vslb" 1 } } */
+/* { dg-final { scan-assembler-times "vslh" 1 } } */
+/* { dg-final { scan-assembler-times "vslw" 1 } } */
+/* { dg-final { scan-assembler-times "vsldoi" 3 } } */