From patchwork Tue Aug 12 10:12:11 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Alan Lawrence X-Patchwork-Id: 379284 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id 4F32614010C for ; Tue, 12 Aug 2014 20:12:36 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:references :in-reply-to:content-type; q=dns; s=default; b=WC5i85NIhJ8NewiKx okrJfPTF2+YtYm150+YtCnb7VRtc8OljFQxOXjqSi3UOv3s0mo5iIb9Cl9pr0m2p jAEAW0twz6g0ncLFJJRyWrVEHLPqX/f03UkdKTubBITE/KhitxklldjaX483/cPN vNX7Gr7U/XxEGzWZjdb73+7XJk= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:subject:references :in-reply-to:content-type; s=default; bh=WHP7Q7Twd88PQ6NEWV+TyHZ lNZs=; b=YYkMsi6YK7rBgasFkLn5ifHXj7rNyW2QXO0/iOEwkZxmrKMIXT/ULOa VntTmdhPwf9YNsoZxxZVsPtnPmgANSzCMoNOf2gWFLK8msWZ+kOMtW3e+1Q2aD1C Mrl4uw8mw+rjO0vbuk+AQPHOBz1RrLZ89fq6zRVUYyX+duv6ZJHU= Received: (qmail 12533 invoked by alias); 12 Aug 2014 10:12:25 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 12485 invoked by uid 89); 12 Aug 2014 10:12:20 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-1.9 required=5.0 tests=AWL, BAYES_00, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Tue, 12 Aug 2014 10:12:16 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Tue, 12 Aug 2014 11:12:13 +0100 Received: from [10.1.209.51] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Tue, 12 Aug 2014 11:12:12 +0100 Message-ID: <53E9E87B.4080700@arm.com> Date: Tue, 12 Aug 2014 11:12:11 +0100 From: Alan Lawrence User-Agent: Thunderbird 2.0.0.24 (X11/20101213) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" Subject: [PATCH AArch64 2/2] Replace temporary inline assembler for vget_high References: <53E9E499.4050806@arm.com> In-Reply-To: <53E9E499.4050806@arm.com> X-MC-Unique: 114081211121301501 X-IsSubscribed: yes This patch replaces the current inline assembler for the vget_high intrinsics in arm_neon.h with a sequence of other calls, in a similar fashion to vget_low. Unlike the assembler, these are all transparent to the front-end, so should enable better optimization through the mid-end. Tested check-gcc and check-g++ and aarch64-none-elf and aarch64_be-none-elf (including new tests in previous patch!). diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 45f545779292bbd3ceb1b9e13c980988e52bc3ec..706e07c8cda5d1513a6ea9817f6054cfb2258d48 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -4322,6 +4322,85 @@ vget_low_u64 (uint64x2_t __a) #undef __GET_LOW +#define __GET_HIGH(__TYPE) \ + uint64x2_t tmp = vreinterpretq_u64_##__TYPE (__a); \ + uint64x1_t hi = vcreate_u64 (vgetq_lane_u64 (tmp, 1)); \ + return vreinterpret_##__TYPE##_u64 (hi); + +__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) +vget_high_f32 (float32x4_t __a) +{ + __GET_HIGH (f32); +} + +__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) +vget_high_f64 (float64x2_t __a) +{ + __GET_HIGH (f64); +} + +__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) +vget_high_p8 (poly8x16_t __a) +{ + __GET_HIGH (p8); +} + +__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) +vget_high_p16 (poly16x8_t __a) +{ + __GET_HIGH (p16); +} + +__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) +vget_high_s8 (int8x16_t __a) +{ + __GET_HIGH (s8); +} + +__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) +vget_high_s16 (int16x8_t __a) +{ + __GET_HIGH (s16); +} + +__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) +vget_high_s32 (int32x4_t __a) +{ + __GET_HIGH (s32); +} + +__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) +vget_high_s64 (int64x2_t __a) +{ + __GET_HIGH (s64); +} + +__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) +vget_high_u8 (uint8x16_t __a) +{ + __GET_HIGH (u8); +} + +__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) +vget_high_u16 (uint16x8_t __a) +{ + __GET_HIGH (u16); +} + +__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) +vget_high_u32 (uint32x4_t __a) +{ + __GET_HIGH (u32); +} + +#undef __GET_HIGH + +__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) +vget_high_u64 (uint64x2_t __a) +{ + return vcreate_u64 (vgetq_lane_u64 (__a, 1)); +} + __extension__ static __inline int8x16_t __attribute__ ((__always_inline__)) vcombine_s8 (int8x8_t __a, int8x8_t __b) { @@ -5764,138 +5843,6 @@ vfmsq_f64 (float64x2_t a, float64x2_t b, float64x2_t c) return result; } -__extension__ static __inline float32x2_t __attribute__ ((__always_inline__)) -vget_high_f32 (float32x4_t a) -{ - float32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline float64x1_t __attribute__ ((__always_inline__)) -vget_high_f64 (float64x2_t a) -{ - float64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__)) -vget_high_p8 (poly8x16_t a) -{ - poly8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__)) -vget_high_p16 (poly16x8_t a) -{ - poly16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) -vget_high_s8 (int8x16_t a) -{ - int8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) -vget_high_s16 (int16x8_t a) -{ - int16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) -vget_high_s32 (int32x4_t a) -{ - int32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline int64x1_t __attribute__ ((__always_inline__)) -vget_high_s64 (int64x2_t a) -{ - int64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__)) -vget_high_u8 (uint8x16_t a) -{ - uint8x8_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__)) -vget_high_u16 (uint16x8_t a) -{ - uint16x4_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__)) -vget_high_u32 (uint32x4_t a) -{ - uint32x2_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - -__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__)) -vget_high_u64 (uint64x2_t a) -{ - uint64x1_t result; - __asm__ ("ins %0.d[0], %1.d[1]" - : "=w"(result) - : "w"(a) - : /* No clobbers */); - return result; -} - __extension__ static __inline int8x8_t __attribute__ ((__always_inline__)) vhsub_s8 (int8x8_t a, int8x8_t b) {