From patchwork Wed Apr 22 17:19:18 2015 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Lawrence X-Patchwork-Id: 463733 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id D2F28140083 for ; Thu, 23 Apr 2015 03:19:31 +1000 (AEST) Authentication-Results: ozlabs.org; dkim=pass reason="1024-bit key; unprotected key" header.d=gcc.gnu.org header.i=@gcc.gnu.org header.b=SpCoyBLg; dkim-adsp=none (unprotected policy); dkim-atps=neutral DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:in-reply-to :content-type; q=dns; s=default; b=g/W3NoiY9q86SmnX98gtE+GnX+tGm lV61/nIZxZoa3Y4C9GV5Pv6NgHQwcO5/D+6ntWUtqTpX+ftZ55Jn8UzJd1maplWa vVSmWIJMrqPZD1hRg0J8Qlam8gkF7E1D53Y8nFRLt3y8Syn44td+xUtCIiq+HjLT gaND/f9CmiIQgY= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:in-reply-to :content-type; s=default; bh=nUw6A+omVcuWj4FdgYsNwrzTU3U=; b=SpC oyBLg0Z0POtIMJOcbC5V5jSI94kINDZg5/tooQnGKJsz1Wy3Wmu7Hwq2uvxpQQ8y 8BemUiH9oebsec4LNOGR4gH+XKW8Z1ALZqYunJNHv1/r38ULENTU2mv2Wez00Txx BO56qTxYKN0Wb/Em3m8k0+EsOKggpEleWt4cgkKw= Received: (qmail 60223 invoked by alias); 22 Apr 2015 17:19:23 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 60214 invoked by uid 89); 22 Apr 2015 17:19:23 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.8 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 X-HELO: eu-smtp-delivery-143.mimecast.com Received: from eu-smtp-delivery-143.mimecast.com (HELO eu-smtp-delivery-143.mimecast.com) (146.101.78.143) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 22 Apr 2015 17:19:21 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.140]) by uk-mta-9.uk.mimecast.lan; Wed, 22 Apr 2015 18:19:19 +0100 Received: from [10.2.207.65] ([10.1.2.79]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Wed, 22 Apr 2015 18:19:19 +0100 Message-ID: <5537D816.70208@arm.com> Date: Wed, 22 Apr 2015 18:19:18 +0100 From: Alan Lawrence User-Agent: Thunderbird 2.0.0.24 (X11/20101213) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" Subject: [PATCH 9/14][AArch64] vld1(q?)_dup, missing vreinterpretq intrinsics In-Reply-To: <5537D241.1000606@arm.com> X-MC-Unique: fZ5PVhWSS6icyxXM7DMjUQ-1 X-IsSubscribed: yes gcc/ChangeLog: * config/aarch64/arm_neon.h (vreinterpretq_p8_f16, vreinterpretq_p16_f16, vreinterpretq_f32_f16, vreinterpretq_f64_f16, vreinterpretq_s64_f16, vreinterpretq_s8_f16, vreinterpretq_s16_f16, vreinterpretq_s32_f16, vreinterpretq_u8_f16, vreinterpretq_u16_f16, vreinterpretq_u32_f16, vld1_dup_f16, vld1q_dup_f16): New. diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 88723231e5c32faf3bc68eccdf4e3a2b104b57b9..6d98b2e08221c3e25c4f66e6058b4f228d90a094 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -2993,6 +2993,12 @@ vreinterpretq_p8_s64 (int64x2_t __a) } __extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_p8_f16 (float16x8_t __a) +{ + return (poly8x16_t) __a; +} + +__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__)) vreinterpretq_p8_f32 (float32x4_t __a) { return (poly8x16_t) __a; @@ -3131,6 +3137,12 @@ vreinterpretq_p16_s64 (int64x2_t __a) } __extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_p16_f16 (float16x8_t __a) +{ + return (poly16x8_t) __a; +} + +__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__)) vreinterpretq_p16_f32 (float32x4_t __a) { return (poly16x8_t) __a; @@ -3383,6 +3395,12 @@ vreinterpret_f32_p16 (poly16x4_t __a) } __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_f32_f16 (float16x8_t __a) +{ + return (float32x4_t) __a; +} + +__extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vreinterpretq_f32_f64 (float64x2_t __a) { return (float32x4_t) __a; @@ -3521,6 +3539,12 @@ vreinterpret_f64_u64 (uint64x1_t __a) } __extension__ static __inline float64x2_t __attribute__((__always_inline__)) +vreinterpretq_f64_f16 (float16x8_t __a) +{ + return (float64x2_t) __a; +} + +__extension__ static __inline float64x2_t __attribute__((__always_inline__)) vreinterpretq_f64_f32 (float32x4_t __a) { return (float64x2_t) __a; @@ -3683,6 +3707,12 @@ vreinterpretq_s64_s32 (int32x4_t __a) } __extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) +vreinterpretq_s64_f16 (float16x8_t __a) +{ + return (int64x2_t) __a; +} + +__extension__ static __inline int64x2_t __attribute__ ((__always_inline__)) vreinterpretq_s64_f32 (float32x4_t __a) { return (int64x2_t) __a; @@ -3965,6 +3995,12 @@ vreinterpretq_s8_s64 (int64x2_t __a) } __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_s8_f16 (float16x8_t __a) +{ + return (int8x16_t) __a; +} + +__extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vreinterpretq_s8_f32 (float32x4_t __a) { return (int8x16_t) __a; @@ -4103,6 +4139,12 @@ vreinterpretq_s16_s64 (int64x2_t __a) } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_s16_f16 (float16x8_t __a) +{ + return (int16x8_t) __a; +} + +__extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vreinterpretq_s16_f32 (float32x4_t __a) { return (int16x8_t) __a; @@ -4241,6 +4283,12 @@ vreinterpretq_s32_s64 (int64x2_t __a) } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_s32_f16 (float16x8_t __a) +{ + return (int32x4_t) __a; +} + +__extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vreinterpretq_s32_f32 (float32x4_t __a) { return (int32x4_t) __a; @@ -4385,6 +4433,12 @@ vreinterpretq_u8_s64 (int64x2_t __a) } __extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) +vreinterpretq_u8_f16 (float16x8_t __a) +{ + return (uint8x16_t) __a; +} + +__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__)) vreinterpretq_u8_f32 (float32x4_t __a) { return (uint8x16_t) __a; @@ -4523,6 +4577,12 @@ vreinterpretq_u16_s64 (int64x2_t __a) } __extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) +vreinterpretq_u16_f16 (float16x8_t __a) +{ + return (uint16x8_t) __a; +} + +__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__)) vreinterpretq_u16_f32 (float32x4_t __a) { return (uint16x8_t) __a; @@ -4661,6 +4721,12 @@ vreinterpretq_u32_s64 (int64x2_t __a) } __extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) +vreinterpretq_u32_f16 (float16x8_t __a) +{ + return (uint32x4_t) __a; +} + +__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__)) vreinterpretq_u32_f32 (float32x4_t __a) { return (uint32x4_t) __a; @@ -15107,6 +15173,13 @@ vld1q_u64 (const uint64_t *a) /* vld1_dup */ +__extension__ static __inline float16x4_t __attribute__ ((__always_inline__)) +vld1_dup_f16 (const float16_t* __a) +{ + float16_t __f = *__a; + return (float16x4_t) { __f, __f, __f, __f }; +} + __extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) vld1_dup_f32 (const float32_t* __a) { @@ -15181,6 +15254,13 @@ vld1_dup_u64 (const uint64_t* __a) /* vld1q_dup */ +__extension__ static __inline float16x8_t __attribute__ ((__always_inline__)) +vld1q_dup_f16 (const float16_t* __a) +{ + float16_t __f = *__a; + return (float16x8_t) { __f, __f, __f, __f, __f, __f, __f, __f }; +} + __extension__ static __inline float32x4_t __attribute__ ((__always_inline__)) vld1q_dup_f32 (const float32_t* __a) {