@@ -1366,6 +1366,19 @@
"TARGET_ALTIVEC"
"")
+(define_expand "vec_perm_constv16qi"
+ [(match_operand:V16QI 0 "register_operand" "")
+ (match_operand:V16QI 1 "register_operand" "")
+ (match_operand:V16QI 2 "register_operand" "")
+ (match_operand:V16QI 3 "" "")]
+ "TARGET_ALTIVEC"
+{
+ if (altivec_expand_vec_perm_const (operands))
+ DONE;
+ else
+ FAIL;
+})
+
(define_insn "altivec_vrfip" ; ceil
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")]
@@ -55,6 +55,7 @@ extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
extern void rs6000_expand_vector_extract (rtx, rtx, int);
+extern bool altivec_expand_vec_perm_const (rtx op[4]);
extern void build_mask64_2_operands (rtx, rtx *);
extern int expand_block_clear (rtx[]);
extern int expand_block_move (rtx[]);
@@ -26202,6 +26202,181 @@ rs6000_emit_parity (rtx dst, rtx src)
}
}
+/* Expand an Altivec constant permutation. Return true if we match
+ an efficient implementation; false to fall back to VPERM. */
+
+bool
+altivec_expand_vec_perm_const (rtx operands[4])
+{
+ struct altivec_perm_insn {
+ enum insn_code impl;
+ unsigned char perm[16];
+ };
+ static const struct altivec_perm_insn patterns[] = {
+ { CODE_FOR_altivec_vpkuhum,
+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
+ { CODE_FOR_altivec_vpkuwum,
+ { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
+ { CODE_FOR_altivec_vmrghb,
+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
+ { CODE_FOR_altivec_vmrghh,
+ { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
+ { CODE_FOR_altivec_vmrghw,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
+ { CODE_FOR_altivec_vmrglb,
+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
+ { CODE_FOR_altivec_vmrglh,
+ { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
+ { CODE_FOR_altivec_vmrglw,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+ };
+
+ unsigned int i, j, elt, which;
+ unsigned char perm[16];
+ rtx target, op0, op1, sel, x;
+ bool one_vec;
+
+ target = operands[0];
+ op0 = operands[1];
+ op1 = operands[2];
+ sel = operands[3];
+
+ /* Unpack the constant selector. */
+ for (i = which = 0; i < 16; ++i)
+ {
+ rtx e = XVECEXP (sel, 0, i);
+ elt = INTVAL (e) & 31;
+ which |= (elt < 16 ? 1 : 2);
+ perm[i] = elt;
+ }
+
+ /* Simplify the constant selector based on operands. */
+ switch (which)
+ {
+ default:
+ gcc_unreachable ();
+
+ case 3:
+ one_vec = false;
+ if (!rtx_equal_p (op0, op1))
+ break;
+
+ /* Fold the permutation into a single vector. */
+ for (i = 0; i < 16; ++i)
+ if (perm[i] >= 16)
+ perm[i] -= 16;
+ /* FALLTHRU */
+
+ case 1:
+ op1 = op0;
+ one_vec = true;
+ break;
+
+ case 2:
+ for (i = 0; i < 16; ++i)
+ perm[i] -= 16;
+ op0 = op1;
+ one_vec = true;
+ break;
+ }
+
+ /* Look for splat patterns. */
+ if (one_vec)
+ {
+ elt = perm[0];
+
+ for (i = 0; i < 16; ++i)
+ if (perm[i] != elt)
+ break;
+ if (i == 16)
+ {
+ emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt)));
+ return true;
+ }
+
+ if (elt % 2 == 0)
+ {
+ for (i = 0; i < 16; i += 2)
+ if (perm[i] != elt || perm[i + 1] != elt + 1)
+ break;
+ if (i == 16)
+ {
+ x = gen_reg_rtx (V8HImode);
+ emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0),
+ GEN_INT (elt / 2)));
+ emit_move_insn (target, gen_lowpart (V16QImode, x));
+ return true;
+ }
+ }
+
+ if (elt % 4 == 0)
+ {
+ for (i = 0; i < 16; i += 4)
+ if (perm[i] != elt
+ || perm[i + 1] != elt + 1
+ || perm[i + 2] != elt + 2
+ || perm[i + 3] != elt + 3)
+ break;
+ if (i == 16)
+ {
+ x = gen_reg_rtx (V4SImode);
+ emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0),
+ GEN_INT (elt / 4)));
+ emit_move_insn (target, gen_lowpart (V16QImode, x));
+ return true;
+ }
+ }
+ }
+
+ /* Look for merge and pack patterns. */
+ for (j = 0; j < ARRAY_SIZE (patterns); ++j)
+ {
+ bool swapped;
+
+ elt = patterns[j].perm[0];
+ if (perm[0] == elt)
+ swapped = false;
+ else if (perm[0] == elt + 16)
+ swapped = true;
+ else
+ continue;
+ for (i = 1; i < 16; ++i)
+ {
+ elt = patterns[j].perm[i];
+ if (swapped)
+ elt = (elt >= 16 ? elt - 16 : elt + 16);
+ else if (one_vec && elt >= 16)
+ elt -= 16;
+ if (perm[i] != elt)
+ break;
+ }
+ if (i == 16)
+ {
+ enum insn_code icode = patterns[j].impl;
+ enum machine_mode omode = insn_data[icode].operand[0].mode;
+ enum machine_mode imode = insn_data[icode].operand[1].mode;
+
+ if (swapped)
+ x = op0, op0 = op1, op1 = x;
+ if (imode != V16QImode)
+ {
+ op0 = gen_lowpart (imode, op0);
+ op1 = gen_lowpart (imode, op1);
+ }
+ if (omode == V16QImode)
+ x = target;
+ else
+ x = gen_reg_rtx (omode);
+ emit_insn (GEN_FCN (icode) (x, op0, op1));
+ if (omode != V16QImode)
+ emit_move_insn (target, gen_lowpart (V16QImode, x));
+ return true;
+ }
+ }
+
+ return false;
+}
+
/* Return an RTX representing where to find the function value of a
function returning MODE. */
static rtx
new file mode 100644
@@ -0,0 +1,76 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned char V __attribute__((vector_size(16)));
+
+V b1(V x)
+{
+ return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, });
+}
+
+V b2(V x)
+{
+ return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, 2,3,2,3, 2,3,2,3, });
+}
+
+V b4(V x)
+{
+ return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, });
+}
+
+V p2(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 });
+
+}
+
+V p4(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 });
+}
+
+V h1(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
+}
+
+V h2(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 });
+}
+
+V h4(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 });
+}
+
+V l1(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 });
+}
+
+V l2(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 });
+}
+
+V l4(V x, V y)
+{
+ return __builtin_shuffle(x, y,
+ (V){ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vspltb" } } */
+/* { dg-final { scan-assembler "vsplth" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
+/* { dg-final { scan-assembler "vpkuhum" } } */
+/* { dg-final { scan-assembler "vpkuwum" } } */
new file mode 100644
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned short V __attribute__((vector_size(16)));
+
+V f2(V x)
+{
+ return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, });
+}
+
+V f4(V x)
+{
+ return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vsplth" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O -maltivec -mno-vsx" } */
+
+typedef unsigned int V __attribute__((vector_size(16)));
+
+V f4(V x)
+{
+ return __builtin_shuffle(x, (V){ 1,1,1,1, });
+}
+
+/* { dg-final { scan-assembler-not "vperm" } } */
+/* { dg-final { scan-assembler "vspltw" } } */
From: Richard Henderson <rth@twiddle.net> --- gcc/config/rs6000/altivec.md | 13 ++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 175 +++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c | 76 +++++++++ gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c | 19 +++ gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c | 13 ++ 6 files changed, 297 insertions(+), 0 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c