From patchwork Mon Jan 14 18:00:50 2013 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Tejas Belagod X-Patchwork-Id: 211854 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) by ozlabs.org (Postfix) with SMTP id 4A8F42C0094 for ; Tue, 15 Jan 2013 05:01:16 +1100 (EST) Comment: DKIM? See http://www.dkim.org DKIM-Signature: v=1; a=rsa-sha1; c=relaxed/relaxed; d=gcc.gnu.org; s=default; x=1358791277; h=Comment: DomainKey-Signature:Received:Received:Received:Received:Received: Message-ID:Date:From:User-Agent:MIME-Version:To:CC:Subject: Content-Type:Mailing-List:Precedence:List-Id:List-Unsubscribe: List-Archive:List-Post:List-Help:Sender:Delivered-To; bh=ZKnENay OknyopWSL36pSY2nkeFc=; b=TaJKH1d6NvmViUZKo5XHG7e6ZM2ecVUlVf1ISp9 OETCRwPqAA6tOwnN3vmIHmfVz341VlF6K9pKjT0SQ4P5wAsGQhcmooXs8LQWnMDV wO7jfSo/560yghy3B3g8pFK0VWtqE/k46z70itxHeitPPXIB09OgY++LSkEKGJL+ lF4Q= Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=default; d=gcc.gnu.org; h=Received:Received:X-SWARE-Spam-Status:X-Spam-Check-By:Received:Received:Received:Message-ID:Date:From:User-Agent:MIME-Version:To:CC:Subject:X-MC-Unique:Content-Type:X-IsSubscribed:Mailing-List:Precedence:List-Id:List-Unsubscribe:List-Archive:List-Post:List-Help:Sender:Delivered-To; b=vOPXI3d8YqTs3Zf7igv9DGAtYvWODjfSwv7/c3S0fB+AChw6/ufe6l+jC7wcWB jmYzhyl3cHLufN5XOLQhwV2pPUw7s0SBJO2f6ecftMZUC8/9Pu1QQI79RRIyMp+u QoXPYb+a7ixFmnG8dCQ5gByGWRpup5t6jppbSVMT/1uhw=; Received: (qmail 9615 invoked by alias); 14 Jan 2013 18:01:09 -0000 Received: (qmail 9600 invoked by uid 22791); 14 Jan 2013 18:01:06 -0000 X-SWARE-Spam-Status: No, hits=-2.3 required=5.0 tests=AWL, BAYES_00, KHOP_RCVD_UNTRUST, RCVD_IN_DNSWL_LOW, TW_LH, TW_QN, TW_VQ, TW_VW X-Spam-Check-By: sourceware.org Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Mon, 14 Jan 2013 18:00:55 +0000 Received: from cam-owa1.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Mon, 14 Jan 2013 18:00:53 +0000 Received: from [10.1.79.66] ([10.1.255.212]) by cam-owa1.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.0); Mon, 14 Jan 2013 18:00:51 +0000 Message-ID: <50F447D2.4070902@arm.com> Date: Mon, 14 Jan 2013 18:00:50 +0000 From: Tejas Belagod User-Agent: Thunderbird 2.0.0.18 (X11/20081120) MIME-Version: 1.0 To: "gcc-patches@gcc.gnu.org" CC: Marcus Shawcroft Subject: [Patch, AArch64, AArch64-4.7] Fix sqdmulh_lane_* intrinsics. X-MC-Unique: 113011418005303301 X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Hi, Attached is a patch that fixes sqdmulh_lane_* intrinsics. Previously they, used to accept 128-bit lane index range. This fixes this bug to accept 64-bit lane index range. sqdmulh_laneq_* and AdvSIMD scalar ones still accept 128-bit lane index range as before. It has passed regressions on aarch64-none-elf. OK for trunk and aarch64-4.7-branch? Thanks, Tejas Belagod ARM. Changelog 2013-01-14 Tejas Belagod gcc/ * config/aarch64/aarch64-simd-builtins.def: Separate sqdmulh_lane entries into lane and laneq entries. * config/aarch64/aarch64-simd.md (aarch64_sqdmulh_lane): Remove AdvSIMD scalar modes. (aarch64_sqdmulh_laneq): New. (aarch64_sqdmulh_lane): New RTL pattern for Scalar AdvSIMD modes. * config/aarch64/arm_neon.h: Fix all the vqdmulh_lane* intrinsics' builtin implementations to relfect changes in RTL in aarch64-simd.md. * config/aarch64/iterators.md (VCOND): New. (VCONQ): New. diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index d441417..af27079 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -142,9 +142,13 @@ /* Implemented by aarch64_sqdmulh. */ BUILTIN_VSDQ_HSI (BINOP, sqdmulh) BUILTIN_VSDQ_HSI (BINOP, sqrdmulh) - /* Implemented by aarch64_sqdmulh_lane. */ - BUILTIN_VSDQ_HSI (TERNOP, sqdmulh_lane) - BUILTIN_VSDQ_HSI (TERNOP, sqrdmulh_lane) + /* Implemented by aarch64_sqdmulh_lane. */ + BUILTIN_VDQHS (TERNOP, sqdmulh_lane) + BUILTIN_VDQHS (TERNOP, sqdmulh_laneq) + BUILTIN_VDQHS (TERNOP, sqrdmulh_lane) + BUILTIN_VDQHS (TERNOP, sqrdmulh_laneq) + BUILTIN_SD_HSI (TERNOP, sqdmulh_lane) + BUILTIN_SD_HSI (TERNOP, sqrdmulh_lane) BUILTIN_VSDQ_I_DI (BINOP, sshl_n) BUILTIN_VSDQ_I_DI (BINOP, ushl_n) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 63716c3..1fc912c 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -2068,17 +2068,49 @@ ;; sqdmulh_lane (define_insn "aarch64_sqdmulh_lane" - [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w") - (unspec:VSDQ_HSI - [(match_operand:VSDQ_HSI 1 "register_operand" "w") + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return \"sqdmulh\\t%0., %1., %2.[%3]\";" + [(set_attr "simd_type" "simd_sat_mul") + (set_attr "simd_mode" "")] +) + +(define_insn "aarch64_sqdmulh_laneq" + [(set (match_operand:VDQHS 0 "register_operand" "=w") + (unspec:VDQHS + [(match_operand:VDQHS 1 "register_operand" "w") + (vec_select: + (match_operand: 2 "register_operand" "") + (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] + VQDMULH))] + "TARGET_SIMD" + "* + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return \"sqdmulh\\t%0., %1., %2.[%3]\";" + [(set_attr "simd_type" "simd_sat_mul") + (set_attr "simd_mode" "")] +) + +(define_insn "aarch64_sqdmulh_lane" + [(set (match_operand:SD_HSI 0 "register_operand" "=w") + (unspec:SD_HSI + [(match_operand:SD_HSI 1 "register_operand" "w") (vec_select: - (match_operand: 2 "register_operand" "") + (match_operand: 2 "register_operand" "") (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] VQDMULH))] "TARGET_SIMD" "* - aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); - return \"sqdmulh\\t%0, %1, %2.[%3]\";" + aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode)); + return \"sqdmulh\\t%0, %1, %2.[%3]\";" [(set_attr "simd_type" "simd_sat_mul") (set_attr "simd_mode" "")] ) diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h index 5a72029..c455cf0 100644 --- a/gcc/config/aarch64/arm_neon.h +++ b/gcc/config/aarch64/arm_neon.h @@ -18877,49 +18877,49 @@ vpaddd_s64 (int64x2_t __a) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); + return __builtin_aarch64_sqdmulh_laneqv4hi (__a, __b, __c); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vqdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); + return __builtin_aarch64_sqdmulh_laneqv2si (__a, __b, __c); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vqdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); + return __builtin_aarch64_sqdmulh_laneqv8hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); + return __builtin_aarch64_sqdmulh_laneqv4si (__a, __b, __c); } __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqrdmulh_laneq_s16 (int16x4_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); + return __builtin_aarch64_sqrdmulh_laneqv4hi (__a, __b, __c); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vqrdmulh_laneq_s32 (int32x2_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); + return __builtin_aarch64_sqrdmulh_laneqv2si (__a, __b, __c); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vqrdmulhq_laneq_s16 (int16x8_t __a, int16x8_t __b, const int __c) { - return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); + return __builtin_aarch64_sqrdmulh_laneqv8hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqrdmulhq_laneq_s32 (int32x4_t __a, int32x4_t __b, const int __c) { - return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); + return __builtin_aarch64_sqrdmulh_laneqv4si (__a, __b, __c); } /* Table intrinsics. */ @@ -21974,29 +21974,25 @@ vqdmlsls_lane_s32 (int64x1_t __a, int32x1_t __b, int32x4_t __c, const int __d) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); - return __builtin_aarch64_sqdmulh_lanev4hi (__a, __tmp, __c); + return __builtin_aarch64_sqdmulh_lanev4hi (__a, __b, __c); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vqdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); - return __builtin_aarch64_sqdmulh_lanev2si (__a, __tmp, __c); + return __builtin_aarch64_sqdmulh_lanev2si (__a, __b, __c); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vqdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); - return __builtin_aarch64_sqdmulh_lanev8hi (__a, __tmp, __c); + return __builtin_aarch64_sqdmulh_lanev8hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); - return __builtin_aarch64_sqdmulh_lanev4si (__a, __tmp, __c); + return __builtin_aarch64_sqdmulh_lanev4si (__a, __b, __c); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) @@ -22290,29 +22286,25 @@ vqnegs_s32 (int32x1_t __a) __extension__ static __inline int16x4_t __attribute__ ((__always_inline__)) vqrdmulh_lane_s16 (int16x4_t __a, int16x4_t __b, const int __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); - return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __tmp, __c); + return __builtin_aarch64_sqrdmulh_lanev4hi (__a, __b, __c); } __extension__ static __inline int32x2_t __attribute__ ((__always_inline__)) vqrdmulh_lane_s32 (int32x2_t __a, int32x2_t __b, const int __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); - return __builtin_aarch64_sqrdmulh_lanev2si (__a, __tmp, __c); + return __builtin_aarch64_sqrdmulh_lanev2si (__a, __b, __c); } __extension__ static __inline int16x8_t __attribute__ ((__always_inline__)) vqrdmulhq_lane_s16 (int16x8_t __a, int16x4_t __b, const int __c) { - int16x8_t __tmp = vcombine_s16 (__b, vcreate_s16 (INT64_C (0))); - return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __tmp, __c); + return __builtin_aarch64_sqrdmulh_lanev8hi (__a, __b, __c); } __extension__ static __inline int32x4_t __attribute__ ((__always_inline__)) vqrdmulhq_lane_s32 (int32x4_t __a, int32x2_t __b, const int __c) { - int32x4_t __tmp = vcombine_s32 (__b, vcreate_s32 (INT64_C (0))); - return __builtin_aarch64_sqrdmulh_lanev4si (__a, __tmp, __c); + return __builtin_aarch64_sqrdmulh_lanev4si (__a, __b, __c); } __extension__ static __inline int16x1_t __attribute__ ((__always_inline__)) diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0eb30f0..f193214 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -337,6 +337,22 @@ (QI "QI")]) ;; Define container mode for lane selection. +(define_mode_attr VCOND [(V4HI "V4HI") (V8HI "V4HI") + (V2SI "V2SI") (V4SI "V2SI") + (DI "DI") (V2DI "DI") + (V2SF "V2SF") (V4SF "V2SF") + (V2DF "DF")]) + +;; Define container mode for lane selection. +(define_mode_attr VCONQ [(V8QI "V16QI") (V16QI "V16QI") + (V4HI "V8HI") (V8HI "V8HI") + (V2SI "V4SI") (V4SI "V4SI") + (DI "V2DI") (V2DI "V2DI") + (V2SF "V2SF") (V4SF "V4SF") + (V2DF "V2DF") (SI "V4SI") + (HI "V8HI") (QI "V16QI")]) + +;; Define container mode for lane selection. (define_mode_attr VCON [(V8QI "V16QI") (V16QI "V16QI") (V4HI "V8HI") (V8HI "V8HI") (V2SI "V4SI") (V4SI "V4SI")