From patchwork Wed Oct 26 16:01:59 2011 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Richard Henderson X-Patchwork-Id: 121937 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id C5A051007D2 for ; Thu, 27 Oct 2011 03:02:33 +1100 (EST) Received: (qmail 2929 invoked by alias); 26 Oct 2011 16:02:29 -0000 Received: (qmail 2917 invoked by uid 22791); 26 Oct 2011 16:02:27 -0000 X-SWARE-Spam-Status: No, hits=-6.6 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_HI, RP_MATCHES_RCVD, SPF_HELO_PASS, TW_BG X-Spam-Check-By: sourceware.org Received: from mx1.redhat.com (HELO mx1.redhat.com) (209.132.183.28) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Wed, 26 Oct 2011 16:02:04 +0000 Received: from int-mx10.intmail.prod.int.phx2.redhat.com (int-mx10.intmail.prod.int.phx2.redhat.com [10.5.11.23]) by mx1.redhat.com (8.14.4/8.14.4) with ESMTP id p9QG20iP012905 (version=TLSv1/SSLv3 cipher=DHE-RSA-AES256-SHA bits=256 verify=OK); Wed, 26 Oct 2011 12:02:01 -0400 Received: from anchor.twiddle.net (vpn-239-204.phx2.redhat.com [10.3.239.204]) by int-mx10.intmail.prod.int.phx2.redhat.com (8.14.4/8.14.4) with ESMTP id p9QG1xlB028642; Wed, 26 Oct 2011 12:01:59 -0400 Message-ID: <4EA82EF7.50903@redhat.com> Date: Wed, 26 Oct 2011 09:01:59 -0700 From: Richard Henderson User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:7.0) Gecko/20110927 Thunderbird/7.0 MIME-Version: 1.0 To: Ulrich Weigand CC: gcc-patches@gcc.gnu.org, irar@il.ibm.com, dje.gcc@gmail.com, davem@davemloft.net, developer@sandoe-acoustics.co.uk Subject: Re: [PATCH 0/6] More vector permutation work References: <201110261430.p9QEUEkg005708@d06av02.portsmouth.uk.ibm.com> In-Reply-To: <201110261430.p9QEUEkg005708@d06av02.portsmouth.uk.ibm.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org On 10/26/2011 07:30 AM, Ulrich Weigand wrote: > This fails since for u == 4 and mode == V4SFmode it attempts to expand > a V4SFmode shift, which is unsupported. > > Shouldn't this be using the mode of the selector rather than the mode > of the result in any case? Yes, it should use the mode of the selector. And doing just that is enough to fix the bug. But I noticed that the actual results for a constant permutation were much nastier than they ought to be. Try this. Iain, this might solve your case too; no I can't test myself off cross, because libgfortran needs the whole cross-env -- system headers and everything. r~ diff --git a/gcc/optabs.c b/gcc/optabs.c index 9afc911..736d826 100644 --- a/gcc/optabs.c +++ b/gcc/optabs.c @@ -6912,7 +6912,7 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) enum insn_code icode; enum machine_mode qimode; unsigned int i, w, e, u; - rtx tmp, sel_qi; + rtx tmp, sel_qi = NULL; rtvec vec; if (!target || GET_MODE (target) != mode) @@ -6946,23 +6946,23 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) /* Fall back to a constant byte-based permutation. */ if (qimode != VOIDmode) { - icode = direct_optab_handler (vec_perm_const_optab, qimode); - if (icode != CODE_FOR_nothing) + vec = rtvec_alloc (w); + for (i = 0; i < e; ++i) { - vec = rtvec_alloc (w); - for (i = 0; i < e; ++i) - { - unsigned int j, this_e; + unsigned int j, this_e; - this_e = INTVAL (XVECEXP (sel, 0, i)); - this_e &= 2 * e - 1; - this_e *= u; + this_e = INTVAL (XVECEXP (sel, 0, i)); + this_e &= 2 * e - 1; + this_e *= u; - for (j = 0; j < u; ++j) - RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); - } - sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); + for (j = 0; j < u; ++j) + RTVEC_ELT (vec, i * u + j) = GEN_INT (this_e + j); + } + sel_qi = gen_rtx_CONST_VECTOR (qimode, vec); + icode = direct_optab_handler (vec_perm_const_optab, qimode); + if (icode != CODE_FOR_nothing) + { tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), gen_lowpart (qimode, v0), gen_lowpart (qimode, v1), sel_qi); @@ -6989,47 +6989,53 @@ expand_vec_perm (enum machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target) if (icode == CODE_FOR_nothing) return NULL_RTX; - /* Multiply each element by its byte size. */ - if (u == 2) - sel = expand_simple_binop (mode, PLUS, sel, sel, sel, 0, OPTAB_DIRECT); - else - sel = expand_simple_binop (mode, ASHIFT, sel, GEN_INT (exact_log2 (u)), - sel, 0, OPTAB_DIRECT); - gcc_assert (sel != NULL); - - /* Broadcast the low byte each element into each of its bytes. */ - vec = rtvec_alloc (w); - for (i = 0; i < w; ++i) - { - int this_e = i / u * u; - if (BYTES_BIG_ENDIAN) - this_e += u - 1; - RTVEC_ELT (vec, i) = GEN_INT (this_e); - } - tmp = gen_rtx_CONST_VECTOR (qimode, vec); - sel = gen_lowpart (qimode, sel); - sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); - gcc_assert (sel != NULL); - - /* Add the byte offset to each byte element. */ - /* Note that the definition of the indicies here is memory ordering, - so there should be no difference between big and little endian. */ - vec = rtvec_alloc (w); - for (i = 0; i < w; ++i) - RTVEC_ELT (vec, i) = GEN_INT (i % u); - tmp = gen_rtx_CONST_VECTOR (qimode, vec); - sel = expand_simple_binop (qimode, PLUS, sel, tmp, sel, 0, OPTAB_DIRECT); - gcc_assert (sel != NULL); + if (sel_qi == NULL) + { + /* Multiply each element by its byte size. */ + enum machine_mode selmode = GET_MODE (sel); + if (u == 2) + sel = expand_simple_binop (selmode, PLUS, sel, sel, + sel, 0, OPTAB_DIRECT); + else + sel = expand_simple_binop (selmode, ASHIFT, sel, + GEN_INT (exact_log2 (u)), + sel, 0, OPTAB_DIRECT); + gcc_assert (sel != NULL); + + /* Broadcast the low byte each element into each of its bytes. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) + { + int this_e = i / u * u; + if (BYTES_BIG_ENDIAN) + this_e += u - 1; + RTVEC_ELT (vec, i) = GEN_INT (this_e); + } + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel = gen_lowpart (qimode, sel); + sel = expand_vec_perm (qimode, sel, sel, tmp, NULL); + gcc_assert (sel != NULL); + + /* Add the byte offset to each byte element. */ + /* Note that the definition of the indicies here is memory ordering, + so there should be no difference between big and little endian. */ + vec = rtvec_alloc (w); + for (i = 0; i < w; ++i) + RTVEC_ELT (vec, i) = GEN_INT (i % u); + tmp = gen_rtx_CONST_VECTOR (qimode, vec); + sel_qi = expand_simple_binop (qimode, PLUS, sel, tmp, + sel, 0, OPTAB_DIRECT); + gcc_assert (sel_qi != NULL); + } tmp = expand_vec_perm_1 (icode, gen_lowpart (qimode, target), gen_lowpart (qimode, v0), - gen_lowpart (qimode, v1), sel); + gen_lowpart (qimode, v1), sel_qi); if (tmp) tmp = gen_lowpart (mode, tmp); return tmp; } - /* Return insn code for a conditional operator with a comparison in mode CMODE, unsigned if UNS is true, resulting in a value of mode VMODE. */