From patchwork Thu Dec 8 21:06:19 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 130224 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id B02091007D1 for ; Fri, 9 Dec 2011 08:07:20 +1100 (EST) Received: (qmail 19756 invoked by alias); 8 Dec 2011 21:07:00 -0000 Received: (qmail 19549 invoked by uid 22791); 8 Dec 2011 21:06:54 -0000 X-SWARE-Spam-Status: No, hits=-2.2 required=5.0 tests=AWL, BAYES_00, DKIM_SIGNED, DKIM_VALID, FREEMAIL_ENVFROM_END_DIGIT, FREEMAIL_FROM, RCVD_IN_DNSWL_LOW X-Spam-Check-By: sourceware.org Received: from mail-qy0-f175.google.com (HELO mail-qy0-f175.google.com) (209.85.216.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Thu, 08 Dec 2011 21:06:39 +0000 Received: by qcqw6 with SMTP id w6so1800774qcq.20 for ; Thu, 08 Dec 2011 13:06:39 -0800 (PST) Received: by 10.229.65.103 with SMTP id h39mr1234060qci.202.1323378398900; Thu, 08 Dec 2011 13:06:38 -0800 (PST) Received: from anchor.twiddle.home.com ([173.160.232.49]) by mx.google.com with ESMTPS id o15sm12032940qaz.19.2011.12.08.13.06.36 (version=TLSv1/SSLv3 cipher=OTHER); Thu, 08 Dec 2011 13:06:38 -0800 (PST) From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: richard.earnshaw@arm.com, ramana.radhakrishnan@linaro.org, jakub@redhat.com, dje.gcc@gmail.com, sje@cup.hp.com, rdsandiford@googlemail.com, mingjie.xing@gmail.com, meissner@linux.vnet.ibm.com, Richard Henderson Subject: [PATCH 2/6] rs6000: Implement vec_perm_constv16qi for altivec. Date: Thu, 8 Dec 2011 13:06:19 -0800 Message-Id: <1323378383-9824-3-git-send-email-rth@redhat.com> In-Reply-To: <1323378383-9824-1-git-send-email-rth@redhat.com> References: <1323378383-9824-1-git-send-email-rth@redhat.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org From: Richard Henderson --- gcc/config/rs6000/altivec.md | 13 ++ gcc/config/rs6000/rs6000-protos.h | 1 + gcc/config/rs6000/rs6000.c | 175 +++++++++++++++++++++ gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c | 76 +++++++++ gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c | 19 +++ gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c | 13 ++ 6 files changed, 297 insertions(+), 0 deletions(-) create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c create mode 100644 gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index a3a8d77..7797b65 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -1366,6 +1366,19 @@ "TARGET_ALTIVEC" "") +(define_expand "vec_perm_constv16qi" + [(match_operand:V16QI 0 "register_operand" "") + (match_operand:V16QI 1 "register_operand" "") + (match_operand:V16QI 2 "register_operand" "") + (match_operand:V16QI 3 "" "")] + "TARGET_ALTIVEC" +{ + if (altivec_expand_vec_perm_const (operands)) + DONE; + else + FAIL; +}) + (define_insn "altivec_vrfip" ; ceil [(set (match_operand:V4SF 0 "register_operand" "=v") (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 4650152..f2ed084 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -55,6 +55,7 @@ extern void rs6000_expand_vector_init (rtx, rtx); extern void paired_expand_vector_init (rtx, rtx); extern void rs6000_expand_vector_set (rtx, rtx, int); extern void rs6000_expand_vector_extract (rtx, rtx, int); +extern bool altivec_expand_vec_perm_const (rtx op[4]); extern void build_mask64_2_operands (rtx, rtx *); extern int expand_block_clear (rtx[]); extern int expand_block_move (rtx[]); diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 46ad820..9be155d 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -26202,6 +26202,181 @@ rs6000_emit_parity (rtx dst, rtx src) } } +/* Expand an Altivec constant permutation. Return true if we match + an efficient implementation; false to fall back to VPERM. */ + +bool +altivec_expand_vec_perm_const (rtx operands[4]) +{ + struct altivec_perm_insn { + enum insn_code impl; + unsigned char perm[16]; + }; + static const struct altivec_perm_insn patterns[] = { + { CODE_FOR_altivec_vpkuhum, + { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, + { CODE_FOR_altivec_vpkuwum, + { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, + { CODE_FOR_altivec_vmrghb, + { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, + { CODE_FOR_altivec_vmrghh, + { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, + { CODE_FOR_altivec_vmrghw, + { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, + { CODE_FOR_altivec_vmrglb, + { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, + { CODE_FOR_altivec_vmrglh, + { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, + { CODE_FOR_altivec_vmrglw, + { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } } + }; + + unsigned int i, j, elt, which; + unsigned char perm[16]; + rtx target, op0, op1, sel, x; + bool one_vec; + + target = operands[0]; + op0 = operands[1]; + op1 = operands[2]; + sel = operands[3]; + + /* Unpack the constant selector. */ + for (i = which = 0; i < 16; ++i) + { + rtx e = XVECEXP (sel, 0, i); + elt = INTVAL (e) & 31; + which |= (elt < 16 ? 1 : 2); + perm[i] = elt; + } + + /* Simplify the constant selector based on operands. */ + switch (which) + { + default: + gcc_unreachable (); + + case 3: + one_vec = false; + if (!rtx_equal_p (op0, op1)) + break; + + /* Fold the permutation into a single vector. */ + for (i = 0; i < 16; ++i) + if (perm[i] >= 16) + perm[i] -= 16; + /* FALLTHRU */ + + case 1: + op1 = op0; + one_vec = true; + break; + + case 2: + for (i = 0; i < 16; ++i) + perm[i] -= 16; + op0 = op1; + one_vec = true; + break; + } + + /* Look for splat patterns. */ + if (one_vec) + { + elt = perm[0]; + + for (i = 0; i < 16; ++i) + if (perm[i] != elt) + break; + if (i == 16) + { + emit_insn (gen_altivec_vspltb (target, op0, GEN_INT (elt))); + return true; + } + + if (elt % 2 == 0) + { + for (i = 0; i < 16; i += 2) + if (perm[i] != elt || perm[i + 1] != elt + 1) + break; + if (i == 16) + { + x = gen_reg_rtx (V8HImode); + emit_insn (gen_altivec_vsplth (x, gen_lowpart (V8HImode, op0), + GEN_INT (elt / 2))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + if (elt % 4 == 0) + { + for (i = 0; i < 16; i += 4) + if (perm[i] != elt + || perm[i + 1] != elt + 1 + || perm[i + 2] != elt + 2 + || perm[i + 3] != elt + 3) + break; + if (i == 16) + { + x = gen_reg_rtx (V4SImode); + emit_insn (gen_altivec_vspltw (x, gen_lowpart (V4SImode, op0), + GEN_INT (elt / 4))); + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + } + + /* Look for merge and pack patterns. */ + for (j = 0; j < ARRAY_SIZE (patterns); ++j) + { + bool swapped; + + elt = patterns[j].perm[0]; + if (perm[0] == elt) + swapped = false; + else if (perm[0] == elt + 16) + swapped = true; + else + continue; + for (i = 1; i < 16; ++i) + { + elt = patterns[j].perm[i]; + if (swapped) + elt = (elt >= 16 ? elt - 16 : elt + 16); + else if (one_vec && elt >= 16) + elt -= 16; + if (perm[i] != elt) + break; + } + if (i == 16) + { + enum insn_code icode = patterns[j].impl; + enum machine_mode omode = insn_data[icode].operand[0].mode; + enum machine_mode imode = insn_data[icode].operand[1].mode; + + if (swapped) + x = op0, op0 = op1, op1 = x; + if (imode != V16QImode) + { + op0 = gen_lowpart (imode, op0); + op1 = gen_lowpart (imode, op1); + } + if (omode == V16QImode) + x = target; + else + x = gen_reg_rtx (omode); + emit_insn (GEN_FCN (icode) (x, op0, op1)); + if (omode != V16QImode) + emit_move_insn (target, gen_lowpart (V16QImode, x)); + return true; + } + } + + return false; +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c new file mode 100644 index 0000000..ee5c5ee --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-1.c @@ -0,0 +1,76 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned char V __attribute__((vector_size(16))); + +V b1(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, 1,1,1,1, 1,1,1,1, }); +} + +V b2(V x) +{ + return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, 2,3,2,3, 2,3,2,3, }); +} + +V b4(V x) +{ + return __builtin_shuffle(x, (V){ 4,5,6,7, 4,5,6,7, 4,5,6,7, 4,5,6,7, }); +} + +V p2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 }); + +} + +V p4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 }); +} + +V h1(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 }); +} + +V h2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 }); +} + +V h4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 }); +} + +V l1(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 }); +} + +V l2(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 }); +} + +V l4(V x, V y) +{ + return __builtin_shuffle(x, y, + (V){ 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vspltb" } } */ +/* { dg-final { scan-assembler "vsplth" } } */ +/* { dg-final { scan-assembler "vspltw" } } */ +/* { dg-final { scan-assembler "vpkuhum" } } */ +/* { dg-final { scan-assembler "vpkuwum" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c new file mode 100644 index 0000000..1b90bb9 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-2.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned short V __attribute__((vector_size(16))); + +V f2(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, 1,1,1,1, }); +} + +V f4(V x) +{ + return __builtin_shuffle(x, (V){ 2,3,2,3, 2,3,2,3, }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vsplth" } } */ +/* { dg-final { scan-assembler "vspltw" } } */ diff --git a/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c new file mode 100644 index 0000000..9598edf --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/altivec-perm-4.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target powerpc_altivec_ok } */ +/* { dg-options "-O -maltivec -mno-vsx" } */ + +typedef unsigned int V __attribute__((vector_size(16))); + +V f4(V x) +{ + return __builtin_shuffle(x, (V){ 1,1,1,1, }); +} + +/* { dg-final { scan-assembler-not "vperm" } } */ +/* { dg-final { scan-assembler "vspltw" } } */