From patchwork Mon May  5 16:49:54 2014
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Evgeny Stupachenko <evstupac@gmail.com>
X-Patchwork-Id: 345786
Return-Path: 
 <gcc-patches-return-366609-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id AB503140E56
	for <incoming@patchwork.ozlabs.org>;
	Tue,  6 May 2014 02:50:05 +1000 (EST)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender
	:mime-version:in-reply-to:references:date:message-id:subject
	:from:to:cc:content-type; q=dns; s=default; b=xWWLfesE/6Mgew4UEK
	THBYG6bTZpIePeLOK1fl8k0dIhSVIrLbCWYzxR9EzPLpoeFnJN0BfMffyZGwyKtX
	9uzWpPECdrAgPztRV2RqIYMccD4GmP/wKJmfge1ofxkCSB6gFMfyG+3Qnt/9KyEw
	2pPAGRa9743NS2czx/5KIPXwA=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender
	:mime-version:in-reply-to:references:date:message-id:subject
	:from:to:cc:content-type; s=default; bh=9dsPpuhp1W60Wf9AnZtDxbUI
	GqA=; b=QxxUfhUXsoJGRL8znl37e4ZYdEqWRnuYSIXQzFTwdQlsEOBLbQibkr1B
	PISOFumJBgj84vW6zSMnVekBNbhSshT+DiZghb10ytVFHmtGBWeL62t2SHrUKNwY
	y18F/14bZ9wxWegVMQ2ZgnDZz1UKZX+Aw+DddTdmbLcf2HZF8X0=
Received: (qmail 20836 invoked by alias); 5 May 2014 16:49:58 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 20826 invoked by uid 89); 5 May 2014 16:49:58 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00,
	FREEMAIL_FROM, RCVD_IN_DNSWL_LOW,
	SPF_PASS autolearn=ham version=3.3.2
X-HELO: mail-oa0-f47.google.com
Received: from mail-oa0-f47.google.com (HELO mail-oa0-f47.google.com)
	(209.85.219.47) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted)
	ESMTPS; Mon, 05 May 2014 16:49:57 +0000
Received: by mail-oa0-f47.google.com with SMTP id i7so2674228oag.20 for
	<gcc-patches@gcc.gnu.org>; Mon, 05 May 2014 09:49:54 -0700 (PDT)
MIME-Version: 1.0
X-Received: by 10.182.22.18 with SMTP id z18mr32687262obe.42.1399308594848;
	Mon, 05 May 2014 09:49:54 -0700 (PDT)
Received: by 10.76.170.39 with HTTP; Mon, 5 May 2014 09:49:54 -0700 (PDT)
In-Reply-To: <535EC233.7000500@redhat.com>
References: 
 <CAOvf_xx3-VpgN8YDxJBPvzzNGNykUPoLdU6xThW_QBN7byy5rw@mail.gmail.com>
	<535E909A.7040205@redhat.com>
	<CAOvf_xygddE6yxkp=+SZtVz+bvVciH5YfH1Cwy1WxdPHCgrbtg@mail.gmail.com>
	<535EC233.7000500@redhat.com>
Date: Mon, 5 May 2014 20:49:54 +0400
Message-ID: 
 <CAOvf_xxq6PNN2KLpA8yoB2jjgmy-UAOvij_ghgsKPkZnsu2Rkg@mail.gmail.com>
Subject: Re: [PATCH 1/2, x86] Add palignr support for AVX2.
From: Evgeny Stupachenko <evstupac@gmail.com>
To: Richard Henderson <rth@redhat.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>, Richard Biener <rguenther@suse.de>,
	Uros Bizjak <ubizjak@gmail.com>
X-IsSubscribed: yes

Is the following patch ok? It passes bootstrap and make check.

    chance to succeed.  */
@@ -43015,14 +43021,26 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)
   unsigned i, nelt = d->nelt;
   unsigned min, max;
   bool in_order, ok;
-  rtx shift, target;
+  rtx shift, shift1, target, tmp;
   struct expand_vec_perm_d dcopy;

-  /* Even with AVX, palignr only operates on 128-bit vectors.  */
-  if (!TARGET_SSSE3 || GET_MODE_SIZE (d->vmode) != 16)
+  /* SSSE3 is required to apply PALIGNR on 16 bytes operands.  */
+  if (GET_MODE_SIZE (d->vmode) == 16)
+    {
+      if (!TARGET_SSSE3)
+       return false;
+    }
+  /* AVX2 is required to apply PALIGNR on 32 bytes operands.  */
+  else if (GET_MODE_SIZE (d->vmode) == 32)
+    {
+      if (!TARGET_AVX2)
+       return false;
+    }
+  /* Other sizes are not supported.  */
+  else
     return false;

-  min = nelt, max = 0;
+  min = 2 * nelt, max = 0;
   for (i = 0; i < nelt; ++i)
     {
       unsigned e = d->perm[i];
@@ -43041,9 +43059,35 @@ expand_vec_perm_palignr (struct expand_vec_perm_d *d)

   dcopy = *d;
   shift = GEN_INT (min * GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
-  target = gen_reg_rtx (TImode);
-  emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
-                                 gen_lowpart (TImode, d->op0), shift));
+  shift1 = GEN_INT ((min - nelt / 2) *
+          GET_MODE_BITSIZE (GET_MODE_INNER (d->vmode)));
+
+  if (GET_MODE_SIZE (d->vmode) != 32)
+    {
+      target = gen_reg_rtx (TImode);
+      emit_insn (gen_ssse3_palignrti (target, gen_lowpart (TImode, d->op1),
+                                     gen_lowpart (TImode, d->op0), shift));
+    }
+  else
+    {
+      target = gen_reg_rtx (V2TImode);
+      tmp = gen_reg_rtx (V4DImode);
+      emit_insn (gen_avx2_permv2ti (tmp,
+                                   gen_lowpart (V4DImode, d->op0),
+                                   gen_lowpart (V4DImode, d->op1),
+                                   GEN_INT (33)));
+      if (min < nelt / 2)
+        emit_insn (gen_avx2_palignrv2ti (target,
+                                        gen_lowpart (V2TImode, tmp),
+                                        gen_lowpart (V2TImode, d->op0),
+                                        shift));
+      else
+       emit_insn (gen_avx2_palignrv2ti (target,
+                                        gen_lowpart (V2TImode, d->op1),
+                                        gen_lowpart (V2TImode, tmp),
+                                        shift1));
+    }
+

   dcopy.op0 = dcopy.op1 = gen_lowpart (d->vmode, target);
   dcopy.one_operand_p = true;

On Tue, Apr 29, 2014 at 1:03 AM, Richard Henderson <rth@redhat.com> wrote:
> On 04/28/2014 01:43 PM, Evgeny Stupachenko wrote:
>> Agree on checks:
>>
>>   /* PALIGNR of 2 128-bits registers takes only 1 instrucion.
>>      Requires SSSE3.  */
>>   if (GET_MODE_SIZE (d->vmode) == 16)
>>     {
>>       if(!TARGET_SSSE3)
>>         return false;
>>     }
>>   /* PALIGNR of 2 256-bits registers on AVX2 costs only 2 instructions:
>>      PERM and PALIGNR.  It is more profitable than 2 PSHUFB and PERM.  */
>>   else if (GET_MODE_SIZE (d->vmode) == 32)
>>     {
>>       if(!TARGET_AVX2)
>>         return false;
>>     }
>>   else
>>     return false;
>
> Thanks, much better.
>
>
> r~

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 88142a8..91f6f21 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -42807,6 +42807,8 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d)
   return true;
 }

+static bool expand_vec_perm_vpshufb2_vpermq (struct expand_vec_perm_d *d);
+
 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to instantiate D
    in a single instruction.  */

@@ -42946,6 +42948,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_pshufb (d))
     return true;

+  /* Try the AVX2 vpshufb.  */
+  if (expand_vec_perm_vpshufb2_vpermq (d))
+    return true;
+
   /* Try the AVX512F vpermi2 instructions.  */
   rtx vec[64];
   enum machine_mode mode = d->vmode;
@@ -43004,7 +43010,7 @@ expand_vec_perm_pshuflw_pshufhw (struct
expand_vec_perm_d *d)
 }

 /* A subroutine of ix86_expand_vec_perm_builtin_1.  Try to simplify
-   the permutation using the SSSE3 palignr instruction.  This succeeds
+   the permutation using the SSSE3/AVX2 palignr instruction.  This succeeds
    when all of the elements in PERM fit within one vector and we merely
    need to shift them down so that a single vector permutation has a