From patchwork Tue Aug 19 17:02:35 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Lawrence X-Patchwork-Id: 381408 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 390071400E4 for ; Wed, 20 Aug 2014 03:04:56 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; q= dns; s=default; b=xjBJJS1ReHdziTKf00gTeIA20g16nHdfCuf492WT4zajhL mEXETBJLKP/0f3JB4K/y4POghiWnShJ0SLE2VvC8ZO4sc8yh4HIoRrjt3nZ/+uwE 9M9D2cJh5L4hw76YWo5FD9DTE0Cn3tNOqtYaRcufyli3ArlhMwA00HGyMfihY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:content-type; s= default; bh=xBlX6uVCzU4gnlgmEbzEL+20TG4=; b=UIOVv7LXQvmhHnRDEuhb Hs0QT4p5ydbyAlnKQpf+mj76e/GLmFSbMWgMMLW2uZyXtDou4qpTFo0RhHMjdDN5 HSUdnl8rOM2QGmjl5Pboxquu/QXsp7hSnLZdUpbuNtRozTYSvI+lpbq4k0y0xxOj +fMfgQ+NTvLLntlGTM+z+Ns= Received: (qmail 26410 invoked by alias); 19 Aug 2014 17:02:42 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 26384 invoked by uid 89); 19 Aug 2014 17:02:42 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 19 Aug 2014 17:02:40 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Tue, 19 Aug 2014 18:02:38 +0100 Received: from [10.1.209.51] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Tue, 19 Aug 2014 18:02:36 +0100 Message-ID: <53F3832B.2090302@arm.com> Date: Tue, 19 Aug 2014 18:02:35 +0100 From: Alan Lawrence User-Agent: Thunderbird 2.0.0.24 (X11/20101213) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" Subject: [PATCH AArch64] Add a builtin for rbit(q?)_p8; add intrinsics and tests. X-MC-Unique: 114081918023801601 X-IsSubscribed: yes This patch adds the missing vrbit_p8 and vrbitq_p8 intrinsics to arm_neon.h, and implements all the vrbit(q?)_[psu]8 intrinsics using a new builtin, rather than the previous temporary asm. Also adds a testcase checking (a) execution results and (b) that we output rbit vXX.8b,vYY.8b or corresponding with .16b. Tested on aarch64-none-elf and aarch64_be-none-elf. gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_rbit): New pattern. * config/aarch64/aarch64-simd-builtins.def (rbit): New builtin. * config/aarch64/arm_neon.h (vrbit_s8, vrbit_u8, vrbitq_s8, vrbitq_u8): Replace temporary asm with call to builtin. (vrbit_p8, vrbitq_p8): New functions. gcc/testsuite/ChangeLog: * gcc.target/aarch64/simd/vrbit_1.c: New test. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index 268432cc117b7027ee9472fc5a4f9b1ea13bea0f..3b985b3176ff8bc50bd60105e8a2b1a983d54982 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -347,6 +347,8 @@ VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) + BUILTIN_VB (UNOP, rbit, 0) + /* Implemented by aarch64_. */ BUILTIN_VALL (BINOP, zip1, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 1c32f0c4efa0e9b8e8bc06af726798f6aaecf39f..9997cdf8fd0269a0447edd8ce30515730d73d301 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -294,6 +294,15 @@ [(set_attr "type" "neon_rev")] ) +(define_insn "aarch64_rbit" + [(set (match_operand:VB 0 "register_operand" "=w") + (unspec:VB [(match_operand:VB 1 "register_operand" "w")] + UNSPEC_RBIT))] + "TARGET_SIMD" + "rbit\\t%0., %1." + [(set_attr "type" "neon_rbit")] +) + (define_insn "*aarch64_mul3_elt" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index d5d8c23acd75b6f2a4e8cd6cc4daca418372f883..626f418f1e6e49d4969119f43fd620d78b2c055a 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -10477,50 +10477,6 @@ vqrdmulhq_n_s32 (int32x4_t a, int32_t b) result; \ }) -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vrbit_s8 (int8x8_t a) -{ - int8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vrbit_u8 (uint8x8_t a) -{ - uint8x8_t result; - __asm__ ("rbit %0.8b,%1.8b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) -vrbitq_s8 (int8x16_t a) -{ - int8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) -vrbitq_u8 (uint8x16_t a) -{ - uint8x16_t result; - __asm__ ("rbit %0.16b,%1.16b" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) vrecpe_u32 (uint32x2_t a) { @@ -20632,6 +20588,44 @@ vqsubd_u64 (uint64_t __a, uint64_t __b) return __builtin_aarch64_uqsubdi_uuu (__a, __b); } +/* vrbit */ + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vrbit_p8 (poly8x8_t __a) +{ + return (poly8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vrbit_s8 (int8x8_t __a) +{ + return __builtin_aarch64_rbitv8qi (__a); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vrbit_u8 (uint8x8_t __a) +{ + return (uint8x8_t) __builtin_aarch64_rbitv8qi ((int8x8_t) __a); +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vrbitq_p8 (poly8x16_t __a) +{ + return (poly8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t)__a); +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vrbitq_s8 (int8x16_t __a) +{ + return __builtin_aarch64_rbitv16qi (__a); +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vrbitq_u8 (uint8x16_t __a) +{ + return (uint8x16_t) __builtin_aarch64_rbitv16qi ((int8x16_t) __a); +} + /* vrecpe */ __extension__ static __inline float32_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c new file mode 100644 index 0000000000000000000000000000000000000000..77d13d48660e165cf113e3cd9c61d63ff2b4843e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vrbit_1.c @@ -0,0 +1,56 @@ +/* { dg-do run } */ +/* { dg-options "-O2 --save-temps -fno-inline" } */ + +#include + +extern void abort (void); + +uint64_t in1 = 0x0123456789abcdefULL; +uint64_t expected1 = 0x80c4a2e691d5b3f7ULL; + +#define TEST8(BASETYPE, SUFFIX) \ +void test8_##SUFFIX () \ +{ \ + BASETYPE##8x8_t out = vrbit_##SUFFIX (vcreate_##SUFFIX (in1)); \ + uint64_t res = vget_lane_u64 (vreinterpret_u64_##SUFFIX (out), 0); \ + if (res != expected1) abort (); \ +} + +uint64_t in2 = 0xdeadbeefcafebabeULL; +uint64_t expected2 = 0x7bb57df7537f5d7dULL; + +#define TEST16(BASETYPE, SUFFIX) \ +void test16_##SUFFIX () \ +{ \ + BASETYPE##8x16_t in = vcombine_##SUFFIX (vcreate_##SUFFIX (in1), \ + vcreate_##SUFFIX (in2)); \ + uint64x2_t res = vreinterpretq_u64_##SUFFIX (vrbitq_##SUFFIX (in)); \ + uint64_t res1 = vgetq_lane_u64 (res, 0); \ + uint64_t res2 = vgetq_lane_u64 (res, 1); \ + if (res1 != expected1 || res2 != expected2) abort (); \ +} + +TEST8 (poly, p8); +TEST8 (int, s8); +TEST8 (uint, u8); + +TEST16 (poly, p8); +TEST16 (int, s8); +TEST16 (uint, u8); + +int +main (int argc, char **argv) +{ + test8_p8 (); + test8_s8 (); + test8_u8 (); + test16_p8 (); + test16_s8 (); + test16_u8 (); + return 0; +} + +/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.8\[bB\], ?\[vV\]\[0-9\]+\.8\[bB\]" 3 } } */ +/* { dg-final { scan-assembler-times "rbit\[ \t\]+\[vV\]\[0-9\]+\.16\[bB\], ?\[vV\]\[0-9\]+\.16\[bB\]" 3 } } */ + +/* { dg-final { cleanup-saved-temps } } */