From patchwork Wed Apr 16 08:12:54 2014 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: Kyrylo Tkachov X-Patchwork-Id: 339456 Return-Path: X-Original-To: incoming@patchwork.ozlabs.org Delivered-To: patchwork-incoming@bilbo.ozlabs.org Received: from sourceware.org (server1.sourceware.org [209.132.180.131]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by ozlabs.org (Postfix) with ESMTPS id D6AEA14009F for ; Wed, 16 Apr 2014 18:13:09 +1000 (EST) DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:references :in-reply-to:content-type; q=dns; s=default; b=vVnJosryHk5fD++rc wgGRYx/9uFr1dVIfO4Fb+XkR059kz/6tIDfNA/61MXjmWvZ3Lm74b8Tkvo8y/TK6 MLz1661Pq/24w43Z5IJegZdE+1v9wIerwyLdyoGuAmJD+YLvxoIM02dZ6FKXLCzt EDlrx1Cej3tNQdj83JofV0ZM8E= DKIM-Signature: v=1; a=rsa-sha1; c=relaxed; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender :message-id:date:from:mime-version:to:cc:subject:references :in-reply-to:content-type; s=default; bh=198dg6A1PcpihRjAY2V3Vi3 Fxfo=; b=Q8hi1B8LMPIJT0aSRrtnwHz7tr1maEznnxKPNp1Y+OrueR7Zm4gf3En dT+WAh1HvIuiIgfu0oR6yeZVECSTyef50GfJesYVm+yoLunlb8FJnuHR4IC+VoBI t1NBGnA8apOEZoxBZtYZluajiwMbhZQ3wd2gExPjpBBHCzHIYzcw= Received: (qmail 20704 invoked by alias); 16 Apr 2014 08:13:02 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Unsubscribe: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Delivered-To: mailing list gcc-patches@gcc.gnu.org Received: (qmail 20694 invoked by uid 89); 16 Apr 2014 08:13:01 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.1 required=5.0 tests=AWL, BAYES_00, RCVD_IN_DNSWL_LOW, SPF_PASS autolearn=ham version=3.3.2 X-HELO: service87.mimecast.com Received: from service87.mimecast.com (HELO service87.mimecast.com) (91.220.42.44) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with ESMTP; Wed, 16 Apr 2014 08:12:59 +0000 Received: from cam-owa2.Emea.Arm.com (fw-tnat.cambridge.arm.com [217.140.96.21]) by service87.mimecast.com; Wed, 16 Apr 2014 09:12:56 +0100 Received: from [10.1.208.24] ([10.1.255.212]) by cam-owa2.Emea.Arm.com with Microsoft SMTPSVC(6.0.3790.3959); Wed, 16 Apr 2014 09:13:09 +0100 Message-ID: <534E3B86.8090502@arm.com> Date: Wed, 16 Apr 2014 09:12:54 +0100 From: Kyrill Tkachov User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130804 Thunderbird/17.0.8 MIME-Version: 1.0 To: Eric Christopher CC: GCC Patches , Marcus Shawcroft , Richard Earnshaw Subject: Re: [PATCH][AArch64] Vectorise bswap[16,32,64] References: <534D172D.5090308@arm.com> In-Reply-To: X-MC-Unique: 114041609125601901 X-IsSubscribed: yes On 15/04/14 18:45, Eric Christopher wrote: > Testcase weirdness? > > for (i < 0; i < N; ++i) > { > arr[i] = i; > expect[i] = __builtin_bswap64 (i); > if (y) /* Avoid vectorisation. */ > abort (); > } > > i < 0 :) > > duplicated in all 3 testcases btw. Oops, here it is fixed. Thanks for catching this. Kyrill > > -eric > > > On Tue, Apr 15, 2014 at 4:25 AM, Kyrill Tkachov wrote: >> Hi all, >> >> This patch enables aarch64 to vectorise bswap[16,32,64] operations by using >> the AdvancedSIMD forms of the rev[16,32,64] instructions. >> >> The TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION hook is extended to return >> the vectorised forms of __builtin_bswap* where possible and vector bswap >> patterns are added. >> >> I've added the tests in vect.exp and a new effective target check >> (vect_bswap) that can be extended for other targets in the future if they >> can also vectorise these operations. Is that ok? >> >> Bootstrapped and tested aarch64-none-linux-gnu. >> >> Ok for trunk? >> >> Thanks, >> Kyrill >> >> 2014-04-15 Kyrylo Tkachov >> >> * config/aarch64/aarch64-builtins.c >> (aarch64_builtin_vectorized_function): Handle BUILT_IN_BSWAP16, >> BUILT_IN_BSWAP32, BUILT_IN_BSWAP64. >> * config/aarch64/aarch64-simd.md (bswap): New pattern. >> * config/aarch64/aarch64-simd-builtins.def: Define vector bswap >> builtins. >> * config/aarch64/iterator.md (VDQHSD): New mode iterator. >> (Vrevsuff): New mode attribute. >> >> 2014-04-15 Kyrylo Tkachov >> >> * lib/target-supports.exp (check_effective_target_vect_bswap): New. >> * gcc.dg/vect/vect-bswap16: New test. >> * gcc.dg/vect/vect-bswap32: Likewise. >> * gcc.dg/vect/vect-bswap64: Likewise. commit 0d6d820881443a7ce7f9bd51f35aff04866c5e57 Author: Kyrylo Tkachov Date: Thu Apr 3 09:22:14 2014 +0100 [AArch64] vectorise bswap diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c index 55cfe0a..d839a40 100644 --- a/gcc/config/aarch64/aarch64-builtins.c +++ b/gcc/config/aarch64/aarch64-builtins.c @@ -1086,7 +1086,29 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in) return aarch64_builtin_decls[builtin]; } - + case BUILT_IN_BSWAP16: +#undef AARCH64_CHECK_BUILTIN_MODE +#define AARCH64_CHECK_BUILTIN_MODE(C, N) \ + (out_mode == N##Imode && out_n == C \ + && in_mode == N##Imode && in_n == C) + if (AARCH64_CHECK_BUILTIN_MODE (4, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4hi]; + else if (AARCH64_CHECK_BUILTIN_MODE (8, H)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv8hi]; + else + return NULL_TREE; + case BUILT_IN_BSWAP32: + if (AARCH64_CHECK_BUILTIN_MODE (2, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2si]; + else if (AARCH64_CHECK_BUILTIN_MODE (4, S)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv4si]; + else + return NULL_TREE; + case BUILT_IN_BSWAP64: + if (AARCH64_CHECK_BUILTIN_MODE (2, D)) + return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOPU_bswapv2di]; + else + return NULL_TREE; default: return NULL_TREE; } diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def index c9b7570..e9736da 100644 --- a/gcc/config/aarch64/aarch64-simd-builtins.def +++ b/gcc/config/aarch64/aarch64-simd-builtins.def @@ -330,6 +330,8 @@ VAR1 (UNOP, floatunsv4si, 2, v4sf) VAR1 (UNOP, floatunsv2di, 2, v2df) + VAR5 (UNOPU, bswap, 10, v4hi, v8hi, v2si, v4si, v2di) + /* Implemented by aarch64_. */ BUILTIN_VALL (BINOP, zip1, 0) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 73aee2c..75db3e8 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -286,6 +286,14 @@ [(set_attr "type" "neon_mul_")] ) +(define_insn "bswap" + [(set (match_operand:VDQHSD 0 "register_operand" "=w") + (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))] + "TARGET_SIMD" + "rev\\t%0., %1." + [(set_attr "type" "neon_rev")] +) + (define_insn "*aarch64_mul3_elt" [(set (match_operand:VMUL 0 "register_operand" "=w") (mult:VMUL diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index f1339b8..2b5ebd1 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -150,6 +150,9 @@ ;; Vector modes for H and S types. (define_mode_iterator VDQHS [V4HI V8HI V2SI V4SI]) +;; Vector modes for H, S and D types. +(define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI]) + ;; Vector modes for Q, H and S types. (define_mode_iterator VDQQHS [V8QI V16QI V4HI V8HI V2SI V4SI]) @@ -352,6 +355,9 @@ (V2DI "2d") (V2SF "2s") (V4SF "4s") (V2DF "2d")]) +(define_mode_attr Vrevsuff [(V4HI "16") (V8HI "16") (V2SI "32") + (V4SI "32") (V2DI "64")]) + (define_mode_attr Vmtype [(V8QI ".8b") (V16QI ".16b") (V4HI ".4h") (V8HI ".8h") (V2SI ".2s") (V4SI ".4s") diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c new file mode 100644 index 0000000..b452a29 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo16 (unsigned short int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap16 (a[i]); +} + +int +main (void) +{ + unsigned short arr[N]; + unsigned short expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap16 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo16 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap32.c b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c new file mode 100644 index 0000000..f90f853 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap32.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo32 (unsigned int* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap32 (a[i]); +} + +int +main (void) +{ + unsigned int arr[N]; + unsigned int expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap32 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo32 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap64.c b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c new file mode 100644 index 0000000..319ab34 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-bswap64.c @@ -0,0 +1,44 @@ +/* { dg-require-effective-target vect_bswap } */ + +#include "tree-vect.h" + +#define N 128 + +volatile int y = 0; + +static inline void +vfoo64 (unsigned long long* a) +{ + int i = 0; + for (i = 0; i < N; ++i) + a[i] = __builtin_bswap64 (a[i]); +} + +int +main (void) +{ + unsigned long long arr[N]; + unsigned long long expect[N]; + int i; + + for (i = 0; i < N; ++i) + { + arr[i] = i; + expect[i] = __builtin_bswap64 (i); + if (y) /* Avoid vectorisation. */ + abort (); + } + + vfoo64 (arr); + + for (i = 0; i < N; ++i) + { + if (arr[i] != expect[i]) + abort (); + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { cleanup-tree-dump "vect" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 57b10d0..910b3dc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -3271,6 +3271,24 @@ proc check_effective_target_vect_shift { } { return $et_vect_shift_saved } +# Return 1 if the target supports vector bswap operations. + +proc check_effective_target_vect_bswap { } { + global et_vect_bswap_saved + + if [info exists et_vect_bswap_saved] { + verbose "check_effective_target_vect_bswap: using cached result" 2 + } else { + set et_vect_bswap_saved 0 + if { [istarget aarch64*-*-*] } { + set et_vect_bswap_saved 1 + } + } + + verbose "check_effective_target_vect_bswap: returning $et_vect_bswap_saved" 2 + return $et_vect_bswap_saved +} + # Return 1 if the target supports hardware vector shift operation for char. proc check_effective_target_vect_shift_char { } {