From patchwork Fri Jun 20 14:17:22 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kyrylo Tkachov X-Patchwork-Id: 362269 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 6BADB14008B for ; Sat, 21 Jun 2014 00:17:37 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:content-type; q=dns; s=default; b=H3p9aiMhfPfSoaY/PEtJale6wdsVAcvrbZR4irv6rxo oo648/3pkKjA1fmNRB0TJF0L03TDfmyG2AHARQUVuk7AG4RCaET3Ef93l8wStNkZ R261QSZKVZeteZfhqWDA4I6Yof8NY9PEGqz7aYODPye2vZljj/3Sx6//vIhNNdeQ = DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:content-type; s=default; bh=Hli0oHjrR5mLLIzh/MP4vSF4xDo=; b=kuYGPe35jqmKoI/Ha KcUbkvBQ1PCWTslbhgfJ3Kq3AY+TJnsYs5IbKuBDYxAUittJkW5Qyvs8XdO5DECf 2KcjcfS5ZfRzBaZXUEOwqjSSCXdh9caYEXaLneOrHQ/LPkc0EnWElQwuMwfTnjJJ IXMLowP/7/OLi+dhOIJ6cFgNoU= Received: (qmail 1254 invoked by alias); 20 Jun 2014 14:17:29 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 1240 invoked by uid 89); 20 Jun 2014 14:17:27 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Fri, 20 Jun 2014 14:17:26 +0000 Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Fri, 20 Jun 2014 15:17:23 +0100 Received: from [10.1.208.24] ([10.1.255.212]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Fri, 20 Jun 2014 15:17:14 +0100 Message-ID: <53A44272.5020005@arm.com> Date: Fri, 20 Jun 2014 15:17:22 +0100 From: Kyrill Tkachov User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:24.0) Gecko/20100101 Thunderbird/24.5.0 MIME-Version: 1.0 To: GCC Patches CC: Marcus Shawcroft Subject: [PATCH][AArch64] Implement vfma_f64, vmla_f64, vfms_f64, vmls_f64 intrinsics X-MC-Unique: 114062015172321101 X-IsSubscribed: yes Hi all, Now that Alan fixed the float64x1_t machinery, this patch implements some low-hanging intrinsics in arm_neon.h. Tested aarch64-none-elf and bootstrapped on aarch64-linux. Ok for trunk? Thanks, Kyrill 2014-06-20 Kyrylo Tkachov * config/aarch64/arm_neon.h (vfma_f64): New intrinsic. (vmla_f64): Likewise. (vfms_f64): Likewise. (vmls_f64): Likewise. 2014-06-20 Kyrylo Tkachov * gcc.target/aarch64/simd/vfma_f64.c: New test. * gcc.target/aarch64/simd/vmla_f64.c: Likewise. * gcc.target/aarch64/simd/vfms_f64.c: Likewise. * gcc.target/aarch64/simd/vmls_f64.c: Likewise. commit ffb5a3efe38e50c0d410b5517e030aa37cad88b7 Author: Kyrylo Tkachov Date: Wed Jun 18 14:25:17 2014 +0100 [AArch64] Implement vfma_f64, vmla_f64, vfms_f64, vmls_f64 in arm_neon.h diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index fd520f5..2809b3e 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -16701,6 +16701,14 @@ vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c) #endif } +/* vfma */ + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vfma_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) +{ + return (float64x1_t) {__builtin_fma (__b[0], __c[0], __a[0])}; +} + /* vfma_lane */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -16804,6 +16812,14 @@ vfmaq_laneq_f64 (float64x2_t __a, float64x2_t __b, __a); } +/* vfms */ + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vfms_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) +{ + return (float64x1_t) {__builtin_fma (-__b[0], __c[0], __a[0])}; +} + /* vfms_lane */ __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) @@ -18432,6 +18448,12 @@ vmla_f32 (float32x2_t a, float32x2_t b, float32x2_t c) return a + b * c; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmla_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) +{ + return __a + __b * __c; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlaq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) { @@ -18600,6 +18622,12 @@ vmls_f32 (float32x2_t a, float32x2_t b, float32x2_t c) return a - b * c; } +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vmls_f64 (float64x1_t __a, float64x1_t __b, float64x1_t __c) +{ + return __a - __b * __c; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vmlsq_f32 (float32x4_t a, float32x4_t b, float32x4_t c) { diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vfma_f64.c b/gcc/testsuite/gcc.target/aarch64/simd/vfma_f64.c new file mode 100644 index 0000000..d6bcf1c --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vfma_f64.c @@ -0,0 +1,41 @@ +/* Test the vfma_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" +#include + +#define EPS 1.0e-15 + + +extern void abort (void); + +int +main (void) +{ + float64x1_t arg1; + float64x1_t arg2; + float64x1_t arg3; + + float64_t expected; + float64_t actual; + + arg1 = vcreate_f64 (0x3fe3955382d35b0eULL); + arg2 = vcreate_f64 (0x3fa88480812d6670ULL); + arg3 = vcreate_f64 (0x3fd5791ae2a92572ULL); + + expected = 0.6280448184360076; + actual = vget_lane_f64 (vfma_f64 (arg1, arg2, arg3), 0); + + if (__builtin_fabs (expected - actual) > EPS) + { + fprintf (stderr, "Expected: %lf, got %lf\n", expected, actual); + abort (); + } + + return 0; +} + +/* { dg-final { scan-assembler-times "fmadd\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vfms_f64.c b/gcc/testsuite/gcc.target/aarch64/simd/vfms_f64.c new file mode 100644 index 0000000..3f34758 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vfms_f64.c @@ -0,0 +1,41 @@ +/* Test the vfms_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-save-temps -O3" } */ + +#include "arm_neon.h" +#include + +#define EPS 1.0e-15 + + +extern void abort (void); + +int +main (void) +{ + float64x1_t arg1; + float64x1_t arg2; + float64x1_t arg3; + + float64_t expected; + float64_t actual; + + arg1 = vcreate_f64 (0x3fe730af8db9e6f7ULL); + arg2 = vcreate_f64 (0x3fe6b78680fa29ceULL); + arg3 = vcreate_f64 (0x3feea3cbf921fbe0ULL); + + expected = 4.4964705746355915e-2; + actual = vget_lane_f64 (vfms_f64 (arg1, arg2, arg3), 0); + + if (__builtin_fabs (expected - actual) > EPS) + { + fprintf (stderr, "Expected: %lf, got %lf\n", expected, actual); + abort (); + } + + return 0; +} + +/* { dg-final { scan-assembler-times "fmsub\[ \t\]+\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+, ?\[dD\]\[0-9\]+\n" 1 } } */ +/* { dg-final { cleanup-saved-temps } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmla_f64.c b/gcc/testsuite/gcc.target/aarch64/simd/vmla_f64.c new file mode 100644 index 0000000..59ce326 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmla_f64.c @@ -0,0 +1,37 @@ +/* Test the vmla_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" +#include + +#define EPS 1.0e-15 + +extern void abort (void); + +int +main (void) +{ + float64x1_t arg1; + float64x1_t arg2; + float64x1_t arg3; + + float64_t expected; + float64_t actual; + + arg1 = vcreate_f64 (0x3fc4de626b6bbe9cULL); + arg2 = vcreate_f64 (0x3fb7e454dbe84408ULL); + arg3 = vcreate_f64 (0x3fdd359b94201a3aULL); + + expected = 0.20563116414665633; + actual = vget_lane_f64 (vmla_f64 (arg1, arg2, arg3), 0); + + if (__builtin_fabs (expected - actual) > EPS) + { + fprintf (stderr, "Expected: %lf, got %lf\n", expected, actual); + abort (); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/aarch64/simd/vmls_f64.c b/gcc/testsuite/gcc.target/aarch64/simd/vmls_f64.c new file mode 100644 index 0000000..9163609 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/simd/vmls_f64.c @@ -0,0 +1,37 @@ +/* Test the vmls_f64 AArch64 SIMD intrinsic. */ + +/* { dg-do run } */ +/* { dg-options "-O3" } */ + +#include "arm_neon.h" +#include + +#define EPS 1.0e-15 + +extern void abort (void); + +int +main (void) +{ + float64x1_t arg1; + float64x1_t arg2; + float64x1_t arg3; + + float64_t expected; + float64_t actual; + + arg1 = vcreate_f64 (0x3fea7ec860271ad9ULL); + arg2 = vcreate_f64 (0x3fca04faa09302e8ULL); + arg3 = vcreate_f64 (0x3fecfec8c67415a0ULL); + + expected = 0.6437868393361155; + actual = vget_lane_f64 (vmls_f64 (arg1, arg2, arg3), 0); + + if (__builtin_fabs (expected - actual) > EPS) + { + fprintf (stderr, "Expected: %lf, got %lf\n", expected, actual); + abort (); + } + + return 0; +}