From patchwork Thu Sep 18 12:34:24 2014
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Alan Lawrence <alan.lawrence@arm.com>
X-Patchwork-Id: 390798
Return-Path: 
 <gcc-patches-return-377992-incoming=patchwork.ozlabs.org@gcc.gnu.org>
X-Original-To: incoming@patchwork.ozlabs.org
Delivered-To: patchwork-incoming@bilbo.ozlabs.org
Received: from sourceware.org (server1.sourceware.org [209.132.180.131])
	(using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256
	bits)) (No client certificate requested)
	by ozlabs.org (Postfix) with ESMTPS id DC4201401AF
	for <incoming@patchwork.ozlabs.org>;
	Thu, 18 Sep 2014 22:34:36 +1000 (EST)
DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender
	:message-id:date:from:mime-version:to:subject:references
	:in-reply-to:content-type; q=dns; s=default; b=yUoY+arA52IevuSUw
	61AR5nWCcPLEGt1BmkCBfxHGmG1Y7iczPk6qLXtsi4FZlZJwN/V/5kJlXJemKlr6
	tYjqmHwZ+XElKaITZw01gm/Ac7IaHGTG/Xuj3wg4/QJBw6kZZtjCsCjsm05rU9k1
	jHx8HYO/rl+aIuBX9mpU52T/+8=
DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id
	:list-unsubscribe:list-archive:list-post:list-help:sender
	:message-id:date:from:mime-version:to:subject:references
	:in-reply-to:content-type; s=default; bh=wrWu5g5VgGontPKazc4y/Xz
	OpCc=; b=gUo0/R5sLOx1mP5nIrcNn8iEtgsh37lLPErH3ftIMOTia5UzIAXoAki
	0Hi5kAgB+lp9MyMZunBAZEVtRV+JcaWC7gcrNNTG6/EbrngNEIx9LE5C26+MEHvT
	Spn4EQ+lfm5wvUn1vNDatk2p+cH5XeTIJ9LpBFCV0V6HmxkwWs3E=
Received: (qmail 28196 invoked by alias); 18 Sep 2014 12:34:30 -0000
Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm
Precedence: bulk
List-Id: <gcc-patches.gcc.gnu.org>
List-Unsubscribe: 
 <mailto:gcc-patches-unsubscribe-incoming=patchwork.ozlabs.org@gcc.gnu.org>
List-Archive: <http://gcc.gnu.org/ml/gcc-patches/>
List-Post: <mailto:gcc-patches@gcc.gnu.org>
List-Help: <mailto:gcc-patches-help@gcc.gnu.org>
Sender: gcc-patches-owner@gcc.gnu.org
Delivered-To: mailing list gcc-patches@gcc.gnu.org
Received: (qmail 28181 invoked by uid 89); 18 Sep 2014 12:34:29 -0000
Authentication-Results: sourceware.org; auth=none
X-Virus-Found: No
X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=AWL, BAYES_00,
	SPF_PASS autolearn=ham version=3.3.2
X-HELO: service87.mimecast.com
Received: from service87.mimecast.com (HELO service87.mimecast.com)
	(91.220.42.44) by sourceware.org
	(qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP;
	Thu, 18 Sep 2014 12:34:28 +0000
Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com
	[217.140.96.21]) by service87.mimecast.com;
	Thu, 18 Sep 2014 13:34:26 +0100
Received: from [10.1.209.51] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with
	Microsoft SMTPSVC(6.0.3790.3959); Thu, 18 Sep 2014 13:34:25 +0100
Message-ID: <541AD150.7090009@arm.com>
Date: Thu, 18 Sep 2014 13:34:24 +0100
From: Alan Lawrence <alan.lawrence@arm.com>
User-Agent: Thunderbird 2.0.0.24 (X11/20101213)
MIME-Version: 1.0
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Subject: [PATCH 10/14][AArch64] Implement vec_shr optab
References: <541AC4D2.9040901@arm.com>
In-Reply-To: <541AC4D2.9040901@arm.com>
X-MC-Unique: 114091813342606201
X-IsSubscribed: yes

This allows reduction of non-(plus|min|max) operations using log_2(N) shifts 
rather than N vec_extracts; e.g. for example code

int
main (unsigned char argc, char **argv)
{
   unsigned char in[16] = { 1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31 };
   unsigned char i = 0;
   unsigned char sum = 1;

   /* Prevent constant propagation of the entire loop below.  */
   asm volatile ("" : : : "memory");

   for (i = 0; i < 16; i++)
     sum *= in[i];

   if (sum != 33)
       __builtin_printf("Failed %d\n", sum);
}

(a simplified, less-general version of vect-reduc-mul_1.c) this gives

main:
         ldr     q0, .LC0
         sub     sp, sp, #16
         str     q0, [sp]
         ldr     q1, [sp]
         movi    v0.4s, 0
         ext     v2.16b, v1.16b, v0.16b, #8
         mul     v1.16b, v1.16b, v2.16b
         ext     v2.16b, v1.16b, v0.16b, #4
         mul     v1.16b, v2.16b, v1.16b
         ext     v2.16b, v1.16b, v0.16b, #2
         mul     v1.16b, v2.16b, v1.16b
         ext     v0.16b, v1.16b, v0.16b, #1
         mul     v0.16b, v0.16b, v1.16b
         umov    w1, v0.b[0]
         cmp     w1, 33
         beq     .L2
         ...

rather than previously:

main:
         ldr     q0, .LC0
         sub     sp, sp, #16
         str     q0, [sp]
         ldr     d1, [sp]
         ldr     d0, [sp, 8]
         mul     v0.8b, v0.8b, v1.8b
         umov    w0, v0.b[1]
         umov    w3, v0.b[0]
         umov    w2, v0.b[2]
         umov    w7, v0.b[3]
         umov    w6, v0.b[4]
         mul     w3, w0, w3
         umov    w5, v0.b[5]
         umov    w4, v0.b[6]
         umov    w1, v0.b[7]
         mul     w3, w3, w2
         mul     w2, w3, w7
         mul     w2, w2, w6
         mul     w0, w2, w5
         mul     w0, w0, w4
         mul     w1, w0, w1
         uxtb    w1, w1
         cmp     w1, 33
         beq     .L2
         ...


Tested check-gcc on aarch64-none-elf and aarch64_be-none-elf. (Including new 
tests from previous patches.)

gcc/ChangeLog:

	* config/aarch64/aarch64-simd.md (vec_shr<mode>): New (*2).

gcc/testsuite/ChangeLog:
	* lib/target_supports.exp (check_effective_target_whole_vector_shift):
	Add aarch64*-*-*.

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index d4a745be59897b4cb2a0de23adb56b5d79203592..3fcf809113d73b37a95653b8c2be432478d2bc1e 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -770,6 +770,45 @@
   }
 )
 
+;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
+(define_insn "vec_shr_<mode>"
+  [(set (match_operand:VD 0 "register_operand" "=w")
+        (lshiftrt:VD (match_operand:VD 1 "register_operand" "w")
+		     (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_SIMD"
+  "ushr %d0, %d1, %2"
+  [(set_attr "type" "neon_shift_imm")]
+)
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VQ 0 "register_operand" "=w")
+        (lshiftrt:VQ (match_operand:VQ 1 "register_operand" "w")
+		      (match_operand:SI 2 "immediate_operand" "i")))]
+  "TARGET_SIMD"
+{
+  HOST_WIDE_INT num_bits = INTVAL (operands[2]);
+  HOST_WIDE_INT elem_bits = GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+  rtx zero_reg = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
+
+  gcc_assert (GET_MODE_BITSIZE (<MODE>mode) == 128);
+  gcc_assert (num_bits % elem_bits == 0);
+
+  if (num_bits == 0)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+  else if (num_bits == 128)
+    {
+      emit_move_insn (operands[0], CONST0_RTX (<MODE>mode));
+      DONE;
+    }
+
+  emit_insn (gen_aarch64_ext<mode> (operands[0], operands[1], zero_reg,
+		      GEN_INT (num_bits / elem_bits)));
+  DONE;
+})
+
 (define_insn "aarch64_simd_vec_setv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w,w")
         (vec_merge:V2DI
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 5e40f5fcdfc95e41e804075bb5daa7030eb9bc66..720cc345bf6a76470cc85116d7b3365be07caa97 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3323,6 +3323,7 @@ proc check_effective_target_vect_shift { } {
 proc check_effective_target_whole_vector_shift { } {
     if { [istarget x86_64-*-*]
 	 || [istarget ia64-*-*]
+	 || [istarget aarch64*-*-*]
 	 || ([check_effective_target_arm32]
 	     && [check_effective_target_arm_little_endian])
 	 || ([istarget mips*-*-*]