From patchwork Mon Aug 4 16:31:44 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kyrylo Tkachov X-Patchwork-Id: 376366 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 992C0140078 for ; Tue, 5 Aug 2014 02:31:58 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:content-type; q=dns; s=default; b=V/bvXYfgVX1fHNjNln1XdBOmVM4Sur3ky/CraNHoTgi dhzUrExmQ45XUotayHAOTYogKrqjl0+paAtDXYz6blGR9IIcJtM1xTZlG+dlQPpe 869CMWtvZb6nuixOxFGA6GwirqA1lxroindfXYde1lXKejL271Sj9Jqr3U4MzLdI = DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:content-type; s=default; bh=n2ct7oPWEtwM5ZPut68dogzNeHg=; b=g9K/8Uuj5Pee6K4sS OYnHoetW6by11s60pWMyR9kl7wlD0OongHhywUWjcJ8qopJoXnd5O4s9XXlhkK1x leZhh8dszvaexNBN87bjfDLn7zW8etndCuhNR1w4JMorx94KB9l+HrrB3k4czs4a Gvo8kALqBinr+u92IWIkGkpECs= Received: (qmail 30071 invoked by alias); 4 Aug 2014 16:31:52 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 30054 invoked by uid 89); 4 Aug 2014 16:31:51 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Mon, 04 Aug 2014 16:31:49 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Mon, 04 Aug 2014 17:31:46 +0100 Received: from [10.1.208.24] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Mon, 4 Aug 2014 17:31:45 +0100 Message-ID: <53DFB570.50504@arm.com> Date: Mon, 04 Aug 2014 17:31:44 +0100 From: Kyrill Tkachov User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0 MIME-Version: 1.0 To: GCC Patches CC: Marcus Shawcroft , Richard Earnshaw Subject: [PATCH][AArch64] Implement some vmul*_lane*_f* intrinsics in arm_neon.h X-MC-Unique: 114080417314600801 X-IsSubscribed: yes Hi all, As part of other intrinsics-related messing around due to the float64x1_t changes I noticed these can be (re)implemented relatively easily. Tested on aarch64-none-elf and aarch64_be-none-elf to make sure the lane-wise intrinsics do the right thing. Ok for trunk? 2014-08-04 Kyrylo Tkachov * config/aarch64/arm_neon.h (vmul_f64): New intrinsic. (vmuld_laneq_f64): Likewise. (vmuls_laneq_f32): Likewise. (vmul_n_f64): Likewise. (vmuld_lane_f64): Reimplement in C. (vmuls_lane_f32): Likewise. 2014-08-04 Kyrylo Tkachov * gcc.target/aarch64/simd/vmul_f64_1.c: New test. * gcc.target/aarch64/simd/vmul_n_f64_1.c: Likewise. * gcc.target/aarch64/simd/vmuld_lane_f64_1.c: Likewise. * gcc.target/aarch64/simd/vmuld_laneq_f64_1.c: Likewise. * gcc.target/aarch64/simd/vmuls_lane_f32_1.c: Likewise. * gcc.target/aarch64/simd/vmuls_laneq_f32_1.c: Likewise. commit c1ba193030ba81fd69669036c7f706a957f44b5d Author: Kyrylo Tkachov Date: Wed Jun 25 15:00:35 2014 +0100 [Needs-tests][AArch64] Implement some vmul* intrinsics diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 3e26345..b23fa64 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -1277,6 +1277,12 @@ vmul_f32 (float32x2_t __a, float32x2_t __b) return __a * __b; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmul_f64 (float64x1_t __a, float64x1_t __b) +{ + return __a * __b; +} + __extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) vmul_u8 (uint8x8_t __a, uint8x8_t __b) { @@ -8299,19 +8305,6 @@ vmul_n_u32 (uint32x2_t a, uint32_t b) return result; } -#define vmuld_lane_f64(a, b, c) \ - __extension__ \ - ({ \ - float64x2_t b_ = (b); \ - float64_t a_ = (a); \ - float64_t result; \ - __asm__ ("fmul %d0,%d1,%2.d[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - #define vmull_high_lane_s16(a, b, c) \ __extension__ \ ({ \ @@ -8828,19 +8821,6 @@ vmulq_n_u32 (uint32x4_t a, uint32_t b) return result; } -#define vmuls_lane_f32(a, b, c) \ - __extension__ \ - ({ \ - float32x4_t b_ = (b); \ - float32_t a_ = (a); \ - float32_t result; \ - __asm__ ("fmul %s0,%s1,%2.s[%3]" \ - : "=w"(result) \ - : "w"(a_), "w"(b_), "i"(c) \ - : /* No clobbers */); \ - result; \ - }) - __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vmulx_f32 (float32x2_t a, float32x2_t b) { @@ -19041,6 +19021,34 @@ vmul_lane_u32 (uint32x2_t __a, uint32x2_t __b, const int __lane) return __a * __aarch64_vget_lane_u32 (__b, __lane); } +/* vmuld_lane */ + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmuld_lane_f64 (float64_t __a, float64x1_t __b, const int __lane) +{ + return __a * vget_lane_f64 (__b, __lane); +} + +__extension__ static __inline float64_t __attribute__ ((__always_inline__)) +vmuld_laneq_f64 (float64_t __a, float64x2_t __b, const int __lane) +{ + return __a * vgetq_lane_f64 (__b, __lane); +} + +/* vmuls_lane */ + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmuls_lane_f32 (float32_t __a, float32x2_t __b, const int __lane) +{ + return __a * vget_lane_f32 (__b, __lane); +} + +__extension__ static __inline float32_t __attribute__ ((__always_inline__)) +vmuls_laneq_f32 (float32_t __a, float32x4_t __b, const int __lane) +{ + return __a * vgetq_lane_f32 (__b, __lane); +} + /* vmul_laneq */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -19079,6 +19087,14 @@ vmul_laneq_u32 (uint32x2_t __a, uint32x4_t __b, const int __lane) return __a * __aarch64_vgetq_lane_u32 (__b, __lane); } +/* vmul_n */ + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmul_n_f64 (float64x1_t __a, float64_t __b) +{ + return (float64x1_t) { vget_lane_f64 (__a, 0) * __b }; +} + /* vmulq_lane */ __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmul_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmul_f64_1.c new file mode 100644 index 0000000..8308175 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmul_f64_1.c @@ -0,0 +1,30 @@ +/* Test the vmul_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float64_t minus_e, pi; + float64_t expected, actual; + + pi = 3.14159265359; + minus_e = -2.71828; + + expected = pi * minus_e; + + actual = vget_lane_f64 (vmul_f64 ((float64x1_t) { pi }, + (float64x1_t) { minus_e }), 0); + if (expected != actual) + abort (); + + return 0; +} + +/* { dg-final { scan-assembler "fmul\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmul_n_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmul_n_f64_1.c new file mode 100644 index 0000000..f8f3cd2 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmul_n_f64_1.c @@ -0,0 +1,27 @@ +/* Test the vmul_n_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float64_t minus_e, pi; + float64_t expected, actual; + + pi = 3.14159265359; + minus_e = -2.71828; + + expected = pi * minus_e; + + actual = vget_lane_f64 (vmul_n_f64 ((float64x1_t) { pi }, + minus_e), 0); + if (expected != actual) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmuld_lane_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmuld_lane_f64_1.c new file mode 100644 index 0000000..818abb0 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmuld_lane_f64_1.c @@ -0,0 +1,26 @@ +/* Test the vmuld_lane_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options " -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float64_t minus_e, pi; + float64_t expected, actual; + + pi = 3.14159265359; + minus_e = -2.71828; + + expected = pi * minus_e; + + actual = vmuld_lane_f64 (pi, (float64x1_t) { minus_e }, 0); + if (expected != actual) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmuld_laneq_f64_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmuld_laneq_f64_1.c new file mode 100644 index 0000000..9848b6f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmuld_laneq_f64_1.c @@ -0,0 +1,36 @@ +/* Test the vmuld_laneq_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options " -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float64_t minus_e, pi, ln2; + float64_t expected, actual; + float64x2_t arg2; + float64_t arr[2]; + + pi = 3.14159265359; + arr[0] = minus_e = -2.71828; + arr[1] = ln2 = 0.69314718056; + + arg2 = vld1q_f64 (arr); + actual = vmuld_laneq_f64 (pi, arg2, 0); + expected = pi * minus_e; + + if (expected != actual) + abort (); + + expected = pi * ln2; + actual = vmuld_laneq_f64 (pi, arg2, 1); + + if (expected != actual) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmuls_lane_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmuls_lane_f32_1.c new file mode 100644 index 0000000..6cd1147 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmuls_lane_f32_1.c @@ -0,0 +1,36 @@ +/* Test the vmuls_lane_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options " -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float32_t minus_e, pi, ln2; + float32_t expected, actual; + float32x2_t arg2; + float32_t arr[2]; + + pi = 3.14159265359; + arr[0] = minus_e = -2.71828; + arr[1] = ln2 = 0.69314718056; + + arg2 = vld1_f32 (arr); + actual = vmuls_lane_f32 (pi, arg2, 0); + expected = pi * minus_e; + + if (expected != actual) + abort (); + + expected = pi * ln2; + actual = vmuls_lane_f32 (pi, arg2, 1); + + if (expected != actual) + abort (); + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmuls_laneq_f32_1.c b/gcc/testsuite/gcc.target/aarch64/simd/vmuls_laneq_f32_1.c new file mode 100644 index 0000000..5b88e89 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmuls_laneq_f32_1.c @@ -0,0 +1,50 @@ +/* Test the vmuls_laneq_f32 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options " -O3" } */ + +#include "arm_neon.h" + +extern void abort (void); + +int +main (void) +{ + volatile float32_t minus_e, pi, ln2, sqrt2, phi; + float32_t expected, actual; + float32x4_t arg2; + float32_t arr[4]; + + pi = 3.14159265359; + arr[0] = minus_e = -2.71828; + arr[1] = ln2 = 0.69314718056; + arr[2] = sqrt2 = 1.41421356237; + arr[3] = phi = 1.61803398874; + + arg2 = vld1q_f32 (arr); + actual = vmuls_laneq_f32 (pi, arg2, 0); + expected = pi * minus_e; + + if (expected != actual) + abort (); + + expected = pi * ln2; + actual = vmuls_laneq_f32 (pi, arg2, 1); + + if (expected != actual) + abort (); + + expected = pi * sqrt2; + actual = vmuls_laneq_f32 (pi, arg2, 2); + + if (expected != actual) + abort (); + + expected = pi * phi; + actual = vmuls_laneq_f32 (pi, arg2, 3); + + if (expected != actual) + abort (); + + return 0; +}