===================================================================
@@ -233,6 +233,7 @@ extern void rs6000_asm_output_dwarf_pcre
const char *label);
extern void rs6000_asm_output_dwarf_datarel (FILE *file, int size,
const char *label);
+extern const char *rs6000_output_xxpermdi (rtx, rtx, rtx, rtx, rtx);
/* Declare functions in rs6000-c.c */
===================================================================
@@ -39007,6 +39007,60 @@ rs6000_optab_supported_p (int op, machin
return true;
}
}
+
+
+/* Output a xxpermdi instruction that sets a 128-bit vector DEST combining two
+ inputs SRC1 and SRC2.
+
+ If ELEMENT1 is null, use the top 64-bit double word of ARG1. If it is
+ non-NULL, it is a 0 or 1 constant that gives the vector element number to
+ use for extracting the 64-bit double word from ARG1.
+
+ If ELEMENT2 is null, use the top 64-bit double word of ARG2. If it is
+ non-NULL, it is a 0 or 1 constant that gives the vector element number to
+ use for extracting the 64-bit double word from ARG2.
+
+ The element number is based on the user element ordering, set by the
+ endianess and by the -maltivec={le,be} options. */
+
+const char *
+rs6000_output_xxpermdi (rtx dest,
+ rtx src1,
+ rtx src2,
+ rtx element1,
+ rtx element2)
+{
+ int op1_dword = (!element1) ? 0 : INTVAL (element1);
+ int op2_dword = (!element2) ? 0 : INTVAL (element2);
+ rtx xops[10];
+ const char *insn_string;
+
+ gcc_assert (IN_RANGE (op1_dword | op2_dword, 0, 1));
+ xops[0] = dest;
+ xops[1] = src1;
+ xops[2] = src2;
+
+ if (BYTES_BIG_ENDIAN)
+ {
+ xops[3] = GEN_INT (2*op1_dword + op2_dword);
+ insn_string = "xxpermdi %x0,%x1,%x2,%3";
+ }
+ else
+ {
+ if (element1)
+ op1_dword = 1 - op1_dword;
+
+ if (element2)
+ op2_dword = 1 - op2_dword;
+
+ xops[3] = GEN_INT (op1_dword + 2*op2_dword);
+ insn_string = "xxpermdi %x0,%x2,%x1,%3";
+ }
+
+ output_asm_insn (insn_string, xops);
+ return "";
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
===================================================================
@@ -2364,19 +2364,18 @@ (define_insn "*vsx_float_fix_v2df2"
;; Build a V2DF/V2DI vector from two scalars
(define_insn "vsx_concat_<mode>"
- [(set (match_operand:VSX_D 0 "gpc_reg_operand" "=<VSa>,we")
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa,we")
(vec_concat:VSX_D
- (match_operand:<VS_scalar> 1 "gpc_reg_operand" "<VS_64reg>,b")
- (match_operand:<VS_scalar> 2 "gpc_reg_operand" "<VS_64reg>,b")))]
+ (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa,b")
+ (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa,b")))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
if (which_alternative == 0)
- return (BYTES_BIG_ENDIAN
- ? "xxpermdi %x0,%x1,%x2,0"
- : "xxpermdi %x0,%x2,%x1,0");
+ return rs6000_output_xxpermdi (operands[0], operands[1], operands[2],
+ NULL_RTX, NULL_RTX);
else if (which_alternative == 1)
- return (BYTES_BIG_ENDIAN
+ return (VECTOR_ELT_ORDER_BIG
? "mtvsrdd %x0,%1,%2"
: "mtvsrdd %x0,%2,%1");
@@ -2385,6 +2384,50 @@ (define_insn "vsx_concat_<mode>"
}
[(set_attr "type" "vecperm")])
+;; Combiner patterns to allow creating XXPERMDI's to access either double
+;; register in a vector register. Note, rs6000_output_xxpermdi expects
+;; operands[0..2] to be the vector registers.
+(define_insn "*vsx_concat_<mode>_1"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))
+ (match_operand:<VS_scalar> 2 "gpc_reg_operand" "wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_output_xxpermdi (operands[0], operands[1], operands[2],
+ operands[3], NULL_RTX);
+})
+
+(define_insn "*vsx_concat_<mode>_2"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (match_operand:<VS_scalar> 1 "gpc_reg_operand" "wa")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_output_xxpermdi (operands[0], operands[1], operands[2],
+ NULL_RTX, operands[3]);
+})
+
+(define_insn "*vsx_concat_<mode>_3"
+ [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wa")
+ (vec_concat:VSX_D
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 3 "const_0_to_1_operand" "n")]))
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 2 "gpc_reg_operand" "wa")
+ (parallel [(match_operand:QI 4 "const_0_to_1_operand" "n")]))))]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ return rs6000_output_xxpermdi (operands[0], operands[1], operands[2],
+ operands[3], operands[4]);
+})
+
;; Special purpose concat using xxpermdi to glue two single precision values
;; together, relying on the fact that internally scalar floats are represented
;; as doubles. This is used to initialize a V4SF vector with 4 floats
@@ -2585,25 +2628,35 @@ (define_expand "vsx_set_v1ti"
DONE;
})
-;; Set the element of a V2DI/VD2F mode
-(define_insn "vsx_set_<mode>"
- [(set (match_operand:VSX_D 0 "vsx_register_operand" "=wd,?<VSa>")
- (unspec:VSX_D
- [(match_operand:VSX_D 1 "vsx_register_operand" "wd,<VSa>")
- (match_operand:<VS_scalar> 2 "vsx_register_operand" "<VS_64reg>,<VSa>")
- (match_operand:QI 3 "u5bit_cint_operand" "i,i")]
- UNSPEC_VSX_SET))]
+;; Rewrite V2DF/V2DI set in terms of VEC_CONCAT
+(define_expand "vsx_set_<mode>"
+ [(use (match_operand:VSX_D 0 "vsx_register_operand"))
+ (use (match_operand:VSX_D 1 "vsx_register_operand"))
+ (use (match_operand:<VS_scalar> 2 "gpc_reg_operand"))
+ (use (match_operand:QI 3 "const_0_to_1_operand"))]
"VECTOR_MEM_VSX_P (<MODE>mode)"
{
- int idx_first = BYTES_BIG_ENDIAN ? 0 : 1;
- if (INTVAL (operands[3]) == idx_first)
- return \"xxpermdi %x0,%x2,%x1,1\";
- else if (INTVAL (operands[3]) == 1 - idx_first)
- return \"xxpermdi %x0,%x1,%x2,0\";
+ rtx dest = operands[0];
+ rtx vec_reg = operands[1];
+ rtx value = operands[2];
+ rtx ele = operands[3];
+ rtx tmp = gen_reg_rtx (<VS_scalar>mode);
+
+ if (ele == const0_rtx)
+ {
+ emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const1_rtx));
+ emit_insn (gen_vsx_concat_<mode> (dest, value, tmp));
+ DONE;
+ }
+ else if (ele == const1_rtx)
+ {
+ emit_insn (gen_vsx_extract_<mode> (tmp, vec_reg, const0_rtx));
+ emit_insn (gen_vsx_concat_<mode> (dest, tmp, value));
+ DONE;
+ }
else
gcc_unreachable ();
-}
- [(set_attr "type" "vecperm")])
+})
;; Extract a DF/DI element from V2DF/V2DI
;; Optimize cases were we can do a simple or direct move.
===================================================================
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { powerpc*-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+vector unsigned long
+test_vpasted (vector unsigned long high, vector unsigned long low)
+{
+ vector unsigned long res;
+ res[1] = high[1];
+ res[0] = low[0];
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mlxv[dw][24]x\M} } } */
+/* { dg-final { scan-assembler-not {\mlvx\M} } } */
+/* { dg-final { scan-assembler-not {\mstxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv[dw][24]x\M} } } */
+/* { dg-final { scan-assembler-not {\mstvx\M} } } */
===================================================================
@@ -0,0 +1,25 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mvsx" } */
+
+vector double
+test_vpasted (vector double high, vector double low)
+{
+ vector double res;
+ res[1] = high[1];
+ res[0] = low[0];
+ return res;
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 1 } } */
+/* { dg-final { scan-assembler-not {\mvspltisw\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxlxor\M} } } */
+/* { dg-final { scan-assembler-not {\mxxspltib\M} } } */
+/* { dg-final { scan-assembler-not {\mlxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mlxv[dw][24]x\M} } } */
+/* { dg-final { scan-assembler-not {\mlvx\M} } } */
+/* { dg-final { scan-assembler-not {\mstxvx?\M} } } */
+/* { dg-final { scan-assembler-not {\mstxv[dw][24]x\M} } } */
+/* { dg-final { scan-assembler-not {\mstvx\M} } } */