From patchwork Thu Sep 18 19:38:28 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Charles Baylis X-Patchwork-Id: 390941 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id F361E140186 for ; Fri, 19 Sep 2014 05:41:40 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references; q=dns; s= default; b=RgtKDohY3X8dkZsFbp+nkoLI9tLKuNXaxRcCWW3/d+xhr7e+TVupV 5opWjueQLe3r4YZVXSv/zm+lWSf5trFJw9Oqq5nsxR8EaRTbEu/We+oPmnVK6/zk CuP1TKQIipqwjq+DaDVfDwVzNzjy7SlzcjtkbPfGR7tgKnpe6ekOF8= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:subject:date:message-id:in-reply-to:references; s=default; bh=vHoA7J5mhEqeXaicZdfyC124Lu0=; b=O/GX0ftjH8Cgi0bcjOT9/rF52dRA +TOAMXKySJTgH3JnPaCDW2zzJH7Bp38e5qy8mUjt1E1wsSiKOlt3/1sqzZGjjFp+ MCHFhEyGBAGBrM+dJsPCMEu9h/4RtQ/FgedKDU6ueH3jsSDEbxo/Ew5/06f4e91A mZYYHc6p6xRsK2Q= Received: (qmail 30370 invoked by alias); 18 Sep 2014 19:40:59 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 30303 invoked by uid 89); 18 Sep 2014 19:40:58 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.6 required=5.0 tests=BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-pd0-f180.google.com Received: from mail-pd0-f180.google.com (HELO mail-pd0-f180.google.com) (209.85.192.180) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-SHA encrypted) ESMTPS; Thu, 18 Sep 2014 19:40:54 +0000 Received: by mail-pd0-f180.google.com with SMTP id ft15so2024716pdb.25 for ; Thu, 18 Sep 2014 12:40:52 -0700 (PDT) X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20130820; h=x-gm-message-state:from:to:subject:date:message-id:in-reply-to :references; bh=IOD3Xb0xPeDVd/eQo8zH1n0eEpl/ZIRoERnYC+jirLI=; b=VX0niz6e4Y3/bk/xhSK6KpY45Bv7qUoVa1OHFdW1z9C95pKlKoPNrpC32fjw5UB87e BCO94xXcjfT/ixppo9xD1dyM1qnqDhwVbgUCBy/UY6B15Mtybjr0yEqZdZkEujHobylP nU6n3kOHrrVwEozISrQLSDsRS2smjUiwjz2jEooT1Giy4Bvi4VwTyZ2Y+41BVxwB3vkq LfmaT70n1oonHgWCOhuV2FJOxLl/K0OqsbSyskQEEEWMRn/OGWbgtFSI165414BWV2OJ 3B7P4oQii2NX/DfnIj1q23Et+x5W49XbVcTdtMCtXP/huPTrNFSq/sCcXtkqHDGCa885 SUEA== X-Gm-Message-State: ALoCoQn0Q5RCfoSwQnYXOXBIWB9RUHgBo2JOJN6D/KA+87bybRs46fb4O9BejfC0zUd/ssNyis8v X-Received: by 10.68.194.194 with SMTP id hy2mr8970778pbc.149.1411069252553; Thu, 18 Sep 2014 12:40:52 -0700 (PDT) Received: from sale.swisscom.com (70-35-38-154.static.wiline.com. [70.35.38.154]) by mx.google.com with ESMTPSA id f12sm20996103pat.36.2014.09.18.12.40.49 for (version=TLSv1.2 cipher=ECDHE-RSA-AES128-SHA bits=128/128); Thu, 18 Sep 2014 12:40:51 -0700 (PDT) From: Charles Baylis To: marcus.shawcroft@arm.com, rearnsha@arm.com, gcc-patches@gcc.gnu.org Subject: [PATCH 3/4] [AARCH64, NEON] Fix unnecessary moves in vld[234]q_* intrinsics Date: Thu, 18 Sep 2014 20:38:28 +0100 Message-Id: <1411069109-31425-4-git-send-email-charles.baylis@linaro.org> In-Reply-To: <1411069109-31425-1-git-send-email-charles.baylis@linaro.org> References: <1411069109-31425-1-git-send-email-charles.baylis@linaro.org> X-IsSubscribed: yes This patch improves code generation of vld[234]q_* intrinsics by avoiding use of the __builtin_aarch64_get_qreg_* builtins to generate a temporary result variable. Instead, a union is used for type-punning, which avoids generation of some unnecessary move instructions. This idiom is already used in several other intrinsics. This patch is independent of the previous patches in the series. Tested (with the rest of the patch series) with make check on aarch64-oe-linux with qemu, and also causes no regressions in clyon's NEON intrinsics tests. Charles Baylis * config/aarch64/arm_neon.h (vld2q_s8, vld2q_p8, vld2q_s16, vld2q_p16, vld2q_s32, vld2q_s64, vld2q_u8, vld2q_u16, vld2q_u32, vld2q_u64, vld2q_f32, vld2q_f64, vld3q_s8, vld3q_p8, vld3q_s16, vld3q_p16, vld3q_s32, vld3q_s64, vld3q_u8, vld3q_u16, vld3q_u32, vld3q_u64, vld3q_f32, vld3q_f64, vld4q_s8, vld4q_p8, vld4q_s16, vld4q_p16, vld4q_s32, vld4q_s64, vld4q_u8, vld4q_u16, vld4q_u32, vld4q_u64, vld4q_f32, vld4q_f64): Use type-punning to convert between NEON intrinsic types and __builtin_aarch64_simd* types. Change-Id: I61efa29138b13c7a83679885343211d604a73b15 --- gcc/config/aarch64/arm_neon.h | 396 +++++++++++++++--------------------------- 1 file changed, 144 insertions(+), 252 deletions(-) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index c1fcb47..87e3baf 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -16969,133 +16969,109 @@ vld2_f32 (const float32_t * __a) __extension__ static __inline int8x16x2_t __attribute__ ((__always_inline__)) vld2q_s8 (const int8_t * __a) { - int8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + union { int8x16x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline poly8x16x2_t __attribute__ ((__always_inline__)) vld2q_p8 (const poly8_t * __a) { - poly8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + union { poly8x16x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline int16x8x2_t __attribute__ ((__always_inline__)) vld2q_s16 (const int16_t * __a) { - int16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + union { int16x8x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline poly16x8x2_t __attribute__ ((__always_inline__)) vld2q_p16 (const poly16_t * __a) { - poly16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + union { poly16x8x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline int32x4x2_t __attribute__ ((__always_inline__)) vld2q_s32 (const int32_t * __a) { - int32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + union { int32x4x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline int64x2x2_t __attribute__ ((__always_inline__)) vld2q_s64 (const int64_t * __a) { - int64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + union { int64x2x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline uint8x16x2_t __attribute__ ((__always_inline__)) vld2q_u8 (const uint8_t * __a) { - uint8x16x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregoiv16qi (__o, 1); - return ret; + union { uint8x16x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline uint16x8x2_t __attribute__ ((__always_inline__)) vld2q_u16 (const uint16_t * __a) { - uint16x8x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregoiv8hi (__o, 1); - return ret; + union { uint16x8x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline uint32x4x2_t __attribute__ ((__always_inline__)) vld2q_u32 (const uint32_t * __a) { - uint32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregoiv4si (__o, 1); - return ret; + union { uint32x4x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline uint64x2x2_t __attribute__ ((__always_inline__)) vld2q_u64 (const uint64_t * __a) { - uint64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregoiv2di (__o, 1); - return ret; + union { uint64x2x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline float32x4x2_t __attribute__ ((__always_inline__)) vld2q_f32 (const float32_t * __a) { - float32x4x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregoiv4sf (__o, 1); - return ret; + union { float32x4x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v4sf ((const __builtin_aarch64_simd_sf *) __a); + return __temp.__i; } __extension__ static __inline float64x2x2_t __attribute__ ((__always_inline__)) vld2q_f64 (const float64_t * __a) { - float64x2x2_t ret; - __builtin_aarch64_simd_oi __o; - __o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregoiv2df (__o, 1); - return ret; + union { float64x2x2_t __i; + __builtin_aarch64_simd_oi __o; } __temp; + __temp.__o = __builtin_aarch64_ld2v2df ((const __builtin_aarch64_simd_df *) __a); + return __temp.__i; } __extension__ static __inline int64x1x3_t __attribute__ ((__always_inline__)) @@ -17245,145 +17221,109 @@ vld3_f32 (const float32_t * __a) __extension__ static __inline int8x16x3_t __attribute__ ((__always_inline__)) vld3q_s8 (const int8_t * __a) { - int8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + union { int8x16x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline poly8x16x3_t __attribute__ ((__always_inline__)) vld3q_p8 (const poly8_t * __a) { - poly8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + union { poly8x16x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline int16x8x3_t __attribute__ ((__always_inline__)) vld3q_s16 (const int16_t * __a) { - int16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + union { int16x8x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline poly16x8x3_t __attribute__ ((__always_inline__)) vld3q_p16 (const poly16_t * __a) { - poly16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + union { poly16x8x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline int32x4x3_t __attribute__ ((__always_inline__)) vld3q_s32 (const int32_t * __a) { - int32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + union { int32x4x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline int64x2x3_t __attribute__ ((__always_inline__)) vld3q_s64 (const int64_t * __a) { - int64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + union { int64x2x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline uint8x16x3_t __attribute__ ((__always_inline__)) vld3q_u8 (const uint8_t * __a) { - uint8x16x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregciv16qi (__o, 2); - return ret; + union { uint8x16x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline uint16x8x3_t __attribute__ ((__always_inline__)) vld3q_u16 (const uint16_t * __a) { - uint16x8x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregciv8hi (__o, 2); - return ret; + union { uint16x8x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline uint32x4x3_t __attribute__ ((__always_inline__)) vld3q_u32 (const uint32_t * __a) { - uint32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregciv4si (__o, 2); - return ret; + union { uint32x4x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline uint64x2x3_t __attribute__ ((__always_inline__)) vld3q_u64 (const uint64_t * __a) { - uint64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregciv2di (__o, 2); - return ret; + union { uint64x2x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline float32x4x3_t __attribute__ ((__always_inline__)) vld3q_f32 (const float32_t * __a) { - float32x4x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregciv4sf (__o, 2); - return ret; + union { float32x4x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v4sf ((const __builtin_aarch64_simd_sf *) __a); + return __temp.__i; } __extension__ static __inline float64x2x3_t __attribute__ ((__always_inline__)) vld3q_f64 (const float64_t * __a) { - float64x2x3_t ret; - __builtin_aarch64_simd_ci __o; - __o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregciv2df (__o, 2); - return ret; + union { float64x2x3_t __i; + __builtin_aarch64_simd_ci __o; } __temp; + __temp.__o = __builtin_aarch64_ld3v2df ((const __builtin_aarch64_simd_df *) __a); + return __temp.__i; } __extension__ static __inline int64x1x4_t __attribute__ ((__always_inline__)) @@ -17545,157 +17485,109 @@ vld4_f32 (const float32_t * __a) __extension__ static __inline int8x16x4_t __attribute__ ((__always_inline__)) vld4q_s8 (const int8_t * __a) { - int8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (int8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + union { int8x16x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline poly8x16x4_t __attribute__ ((__always_inline__)) vld4q_p8 (const poly8_t * __a) { - poly8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (poly8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + union { poly8x16x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline int16x8x4_t __attribute__ ((__always_inline__)) vld4q_s16 (const int16_t * __a) { - int16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (int16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + union { int16x8x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline poly16x8x4_t __attribute__ ((__always_inline__)) vld4q_p16 (const poly16_t * __a) { - poly16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (poly16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + union { poly16x8x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline int32x4x4_t __attribute__ ((__always_inline__)) vld4q_s32 (const int32_t * __a) { - int32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (int32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + union { int32x4x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline int64x2x4_t __attribute__ ((__always_inline__)) vld4q_s64 (const int64_t * __a) { - int64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (int64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + union { int64x2x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline uint8x16x4_t __attribute__ ((__always_inline__)) vld4q_u8 (const uint8_t * __a) { - uint8x16x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); - ret.val[0] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 0); - ret.val[1] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 1); - ret.val[2] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 2); - ret.val[3] = (uint8x16_t) __builtin_aarch64_get_qregxiv16qi (__o, 3); - return ret; + union { uint8x16x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v16qi ((const __builtin_aarch64_simd_qi *) __a); + return __temp.__i; } __extension__ static __inline uint16x8x4_t __attribute__ ((__always_inline__)) vld4q_u16 (const uint16_t * __a) { - uint16x8x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); - ret.val[0] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 0); - ret.val[1] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 1); - ret.val[2] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 2); - ret.val[3] = (uint16x8_t) __builtin_aarch64_get_qregxiv8hi (__o, 3); - return ret; + union { uint16x8x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v8hi ((const __builtin_aarch64_simd_hi *) __a); + return __temp.__i; } __extension__ static __inline uint32x4x4_t __attribute__ ((__always_inline__)) vld4q_u32 (const uint32_t * __a) { - uint32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); - ret.val[0] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 0); - ret.val[1] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 1); - ret.val[2] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 2); - ret.val[3] = (uint32x4_t) __builtin_aarch64_get_qregxiv4si (__o, 3); - return ret; + union { uint32x4x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v4si ((const __builtin_aarch64_simd_si *) __a); + return __temp.__i; } __extension__ static __inline uint64x2x4_t __attribute__ ((__always_inline__)) vld4q_u64 (const uint64_t * __a) { - uint64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); - ret.val[0] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 0); - ret.val[1] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 1); - ret.val[2] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 2); - ret.val[3] = (uint64x2_t) __builtin_aarch64_get_qregxiv2di (__o, 3); - return ret; + union { uint64x2x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v2di ((const __builtin_aarch64_simd_di *) __a); + return __temp.__i; } __extension__ static __inline float32x4x4_t __attribute__ ((__always_inline__)) vld4q_f32 (const float32_t * __a) { - float32x4x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); - ret.val[0] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 0); - ret.val[1] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 1); - ret.val[2] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 2); - ret.val[3] = (float32x4_t) __builtin_aarch64_get_qregxiv4sf (__o, 3); - return ret; + union { float32x4x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v4sf ((const __builtin_aarch64_simd_sf *) __a); + return __temp.__i; } __extension__ static __inline float64x2x4_t __attribute__ ((__always_inline__)) vld4q_f64 (const float64_t * __a) { - float64x2x4_t ret; - __builtin_aarch64_simd_xi __o; - __o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); - ret.val[0] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 0); - ret.val[1] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 1); - ret.val[2] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 2); - ret.val[3] = (float64x2_t) __builtin_aarch64_get_qregxiv2df (__o, 3); - return ret; + union { float64x2x4_t __i; + __builtin_aarch64_simd_xi __o; } __temp; + __temp.__o = __builtin_aarch64_ld4v2df ((const __builtin_aarch64_simd_df *) __a); + return __temp.__i; } /* vmax */